Re: [PATCH 5/8] nowait aio: return on congested block device

2017-05-11 Thread Goldwyn Rodrigues


On 05/11/2017 02:44 AM, Christoph Hellwig wrote:
> Looks fine,
> 
> Reviewed-by: Christoph Hellwig 
> 
> Although lifting the make_request limit is something a lot of users
> would appreciate in the near future..
> 

Yes, I understand. That will be on my todo list next on priority.

-- 
Goldwyn
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-05-11 Thread Christoph Hellwig
Looks fine,

Reviewed-by: Christoph Hellwig 

Although lifting the make_request limit is something a lot of users
would appreciate in the near future..
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/8] nowait aio: return on congested block device

2017-05-09 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

A new bio operation flag REQ_NOWAIT is introduced to identify bio's
orignating from iocb with IOCB_NOWAIT. This flag indicates
to return immediately if a request cannot be made instead
of retrying.

Stacked devices such as md (the ones with make_request_fn hooks)
currently are not supported because it may block for housekeeping.
For example, an md can have a part of the device suspended.
For this reason, only request based devices are supported.
In the future, this feature will be expanded to stacked devices
by teaching them how to handle the REQ_NOWAIT flags.

Signed-off-by: Goldwyn Rodrigues 
---
 block/blk-core.c  | 24 ++--
 block/blk-mq-sched.c  |  3 +++
 block/blk-mq.c|  4 
 fs/direct-io.c| 10 --
 include/linux/bio.h   |  6 ++
 include/linux/blk_types.h |  2 ++
 6 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index d772c221cc17..effe934b806b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1232,6 +1232,11 @@ static struct request *get_request(struct request_queue 
*q, unsigned int op,
if (!IS_ERR(rq))
return rq;
 
+   if (op & REQ_NOWAIT) {
+   blk_put_rl(rl);
+   return ERR_PTR(-EAGAIN);
+   }
+
if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) 
{
blk_put_rl(rl);
return rq;
@@ -1870,6 +1875,17 @@ generic_make_request_checks(struct bio *bio)
goto end_io;
}
 
+   /*
+* For a REQ_NOWAIT based request, return -EOPNOTSUPP
+* if queue does not have QUEUE_FLAG_NOWAIT_SUPPORT set
+* and if it is not a request based queue.
+*/
+
+   if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q)) {
+   err = -EOPNOTSUPP;
+   goto end_io;
+   }
+
part = bio->bi_bdev->bd_part;
if (should_fail_request(part, bio->bi_iter.bi_size) ||
should_fail_request(_to_disk(part)->part0,
@@ -2021,7 +2037,7 @@ blk_qc_t generic_make_request(struct bio *bio)
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
-   if (likely(blk_queue_enter(q, false) == 0)) {
+   if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
struct bio_list lower, same;
 
/* Create a fresh bio_list for all subordinate requests 
*/
@@ -2046,7 +2062,11 @@ blk_qc_t generic_make_request(struct bio *bio)
bio_list_merge(_list_on_stack[0], );
bio_list_merge(_list_on_stack[0], 
_list_on_stack[1]);
} else {
-   bio_io_error(bio);
+   if (unlikely(!blk_queue_dying(q) &&
+   (bio->bi_opf & REQ_NOWAIT)))
+   bio_wouldblock_error(bio);
+   else
+   bio_io_error(bio);
}
bio = bio_list_pop(_list_on_stack[0]);
} while (bio);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index c974a1bbf4cb..019d881d62b7 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -119,6 +119,9 @@ struct request *blk_mq_sched_get_request(struct 
request_queue *q,
if (likely(!data->hctx))
data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
 
+   if (op & REQ_NOWAIT)
+   data->flags |= BLK_MQ_REQ_NOWAIT;
+
if (e) {
data->flags |= BLK_MQ_REQ_INTERNAL;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c7836a1ded97..d7613ae6a269 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1538,6 +1538,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue 
*q, struct bio *bio)
rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, );
if (unlikely(!rq)) {
__wbt_done(q->rq_wb, wb_acct);
+   if (bio->bi_opf & REQ_NOWAIT)
+   bio_wouldblock_error(bio);
return BLK_QC_T_NONE;
}
 
@@ -1662,6 +1664,8 @@ static blk_qc_t blk_sq_make_request(struct request_queue 
*q, struct bio *bio)
rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, );
if (unlikely(!rq)) {
__wbt_done(q->rq_wb, wb_acct);
+   if (bio->bi_opf & REQ_NOWAIT)
+   bio_wouldblock_error(bio);
return BLK_QC_T_NONE;
}
 
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a04ebea77de8..139ebd5ae1c7 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -480,8 +480,12 @@ static int dio_bio_complete(struct dio *dio, struct bio 
*bio)
unsigned i;
int err;
 
-   if (bio->bi_error)
-   dio->io_error = -EIO;
+   if (bio->bi_error) {
+   if (bio->bi_error == -EAGAIN && 

Re: [PATCH 5/8] nowait aio: return on congested block device

2017-04-24 Thread Jens Axboe
On 04/24/2017 03:10 PM, Goldwyn Rodrigues wrote:
> 
> 
> On 04/19/2017 01:45 AM, Christoph Hellwig wrote:
>> On Fri, Apr 14, 2017 at 07:02:54AM -0500, Goldwyn Rodrigues wrote:
>>> From: Goldwyn Rodrigues 
>>>
>>
>>> +   /* Request queue supports BIO_NOWAIT */
>>> +   queue_flag_set_unlocked(QUEUE_FLAG_NOWAIT, q);
>>
>> BIO_NOWAIT is gone.  And the comment would not be needed if the
>> flag had a more descriptive name, e.g. QUEUE_FLAG_NOWAIT_SUPPORT.
>>
>> And I think all request based drivers should set the flag implicitly
>> as ->queuecommand can't sleep, and ->queue_rq only when it's always
>> offloaded to a workqueue when the BLK_MQ_F_BLOCKING flag is set.
>>
> 
> We introduced QUEUE_FLAG_NOWAIT for devices which would not wait for
> request completions. The ones which wait are MD devices because of sync
> or suspend operations.
> 
> The only user of BLK_MQ_F_NONBLOCKING seems to be nbd. As you mentioned,
> it uses the flag to offload it to a workqueue.
> 
> The other way to do it implicitly is to change the flag to
> BLK_MAY_BLOCK_REQS and use it for devices which do wait such as md/dm.
> Is that what you are hinting at? Or do you have something else in mind?

You are misunderstanding. What Christoph (correctly) says is that request
based drivers do not block, so all of those are fine. What you need to
worry about is drivers that are NOT request based. In other words, drivers
that hook into make_request_fn and process bio's.

-- 
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-04-24 Thread Goldwyn Rodrigues


On 04/19/2017 01:45 AM, Christoph Hellwig wrote:
> On Fri, Apr 14, 2017 at 07:02:54AM -0500, Goldwyn Rodrigues wrote:
>> From: Goldwyn Rodrigues 
>>
> 
>> +/* Request queue supports BIO_NOWAIT */
>> +queue_flag_set_unlocked(QUEUE_FLAG_NOWAIT, q);
> 
> BIO_NOWAIT is gone.  And the comment would not be needed if the
> flag had a more descriptive name, e.g. QUEUE_FLAG_NOWAIT_SUPPORT.
> 
> And I think all request based drivers should set the flag implicitly
> as ->queuecommand can't sleep, and ->queue_rq only when it's always
> offloaded to a workqueue when the BLK_MQ_F_BLOCKING flag is set.
> 

We introduced QUEUE_FLAG_NOWAIT for devices which would not wait for
request completions. The ones which wait are MD devices because of sync
or suspend operations.

The only user of BLK_MQ_F_NONBLOCKING seems to be nbd. As you mentioned,
it uses the flag to offload it to a workqueue.

The other way to do it implicitly is to change the flag to
BLK_MAY_BLOCK_REQS and use it for devices which do wait such as md/dm.
Is that what you are hinting at? Or do you have something else in mind?


-- 
Goldwyn
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-04-20 Thread Jan Kara
On Wed 19-04-17 10:21:39, Goldwyn Rodrigues wrote:
> 
> 
> On 04/19/2017 01:45 AM, Christoph Hellwig wrote:
> > 
> >> +  if (bio->bi_opf & REQ_NOWAIT) {
> >> +  if (!blk_queue_nowait(q)) {
> >> +  err = -EOPNOTSUPP;
> >> +  goto end_io;
> >> +  }
> >> +  if (!(bio->bi_opf & REQ_SYNC)) {
> > 
> > I don't understand this check at all..
> 
> It is to ensure at the block layer that NOWAIT comes only for DIRECT
> calls only. I should probably change it to REQ_SYNC | REQ_IDLE.

Ouch. Checking 'REQ_SYNC' for this is

a) unreliable hack
b) layering violation

You just don't care why someone marked bio with REQ_NOWAIT at this place.
Just obey the request if you can, return error if you cannot, but advisory
REQ_SYNC or REQ_IDLE flags have nothing to do with the ability of the block
layer to submit the bio without blocking...

Honza
-- 
Jan Kara 
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-04-19 Thread Goldwyn Rodrigues


On 04/19/2017 01:45 AM, Christoph Hellwig wrote:
> 
>> +if (bio->bi_opf & REQ_NOWAIT) {
>> +if (!blk_queue_nowait(q)) {
>> +err = -EOPNOTSUPP;
>> +goto end_io;
>> +}
>> +if (!(bio->bi_opf & REQ_SYNC)) {
> 
> I don't understand this check at all..

It is to ensure at the block layer that NOWAIT comes only for DIRECT
calls only. I should probably change it to REQ_SYNC | REQ_IDLE.

> 
>> +if (unlikely(!blk_queue_dying(q) && (bio->bi_opf & 
>> REQ_NOWAIT)))
> 
> Please break lines after 80 characters.
> 
>> @@ -119,6 +119,9 @@ struct request *blk_mq_sched_get_request(struct 
>> request_queue *q,
>>  if (likely(!data->hctx))
>>  data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
>>  
>> +if (bio && (bio->bi_opf & REQ_NOWAIT))
>> +data->flags |= BLK_MQ_REQ_NOWAIT;
> 
> Check the op flag again here.
> 
>> +++ b/block/blk-mq.c
>> @@ -1538,6 +1538,8 @@ static blk_qc_t blk_mq_make_request(struct 
>> request_queue *q, struct bio *bio)
>>  rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, );
>>  if (unlikely(!rq)) {
>>  __wbt_done(q->rq_wb, wb_acct);
>> +if (bio && (bio->bi_opf & REQ_NOWAIT))
>> +bio_wouldblock_error(bio);
> 
> bio iѕ dereferences unconditionally above, so you can do the same.
> 
>> @@ -1662,6 +1664,8 @@ static blk_qc_t blk_sq_make_request(struct 
>> request_queue *q, struct bio *bio)
>>  rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, );
>>  if (unlikely(!rq)) {
>>  __wbt_done(q->rq_wb, wb_acct);
>> +if (bio && (bio->bi_opf & REQ_NOWAIT))
>> +bio_wouldblock_error(bio);
> 
> Same here.  Although blk_sq_make_request is gone anyway in the current
> block tree..
> 
>> +/* Request queue supports BIO_NOWAIT */
>> +queue_flag_set_unlocked(QUEUE_FLAG_NOWAIT, q);
> 
> BIO_NOWAIT is gone.  And the comment would not be needed if the
> flag had a more descriptive name, e.g. QUEUE_FLAG_NOWAIT_SUPPORT.
> 
> And I think all request based drivers should set the flag implicitly
> as ->queuecommand can't sleep, and ->queue_rq only when it's always
> offloaded to a workqueue when the BLK_MQ_F_BLOCKING flag is set.
> 

Yes, Do we have a central point (like a probe() function call?) where
this can be done?


-- 
Goldwyn
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-04-19 Thread Christoph Hellwig
On Fri, Apr 14, 2017 at 07:02:54AM -0500, Goldwyn Rodrigues wrote:
> From: Goldwyn Rodrigues 
> 
> A new bio operation flag REQ_NOWAIT is introduced to identify bio's

s/bio/block/

> @@ -1232,6 +1232,11 @@ static struct request *get_request(struct 
> request_queue *q, unsigned int op,
>   if (!IS_ERR(rq))
>   return rq;
>  
> + if (bio && (bio->bi_opf & REQ_NOWAIT)) {
> + blk_put_rl(rl);
> + return ERR_PTR(-EAGAIN);
> + }

Please check the op argument instead of touching bio.

> + if (bio->bi_opf & REQ_NOWAIT) {
> + if (!blk_queue_nowait(q)) {
> + err = -EOPNOTSUPP;
> + goto end_io;
> + }
> + if (!(bio->bi_opf & REQ_SYNC)) {

I don't understand this check at all..

> + if (unlikely(!blk_queue_dying(q) && (bio->bi_opf & 
> REQ_NOWAIT)))

Please break lines after 80 characters.

> @@ -119,6 +119,9 @@ struct request *blk_mq_sched_get_request(struct 
> request_queue *q,
>   if (likely(!data->hctx))
>   data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
>  
> + if (bio && (bio->bi_opf & REQ_NOWAIT))
> + data->flags |= BLK_MQ_REQ_NOWAIT;

Check the op flag again here.

> +++ b/block/blk-mq.c
> @@ -1538,6 +1538,8 @@ static blk_qc_t blk_mq_make_request(struct 
> request_queue *q, struct bio *bio)
>   rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, );
>   if (unlikely(!rq)) {
>   __wbt_done(q->rq_wb, wb_acct);
> + if (bio && (bio->bi_opf & REQ_NOWAIT))
> + bio_wouldblock_error(bio);

bio iѕ dereferences unconditionally above, so you can do the same.

> @@ -1662,6 +1664,8 @@ static blk_qc_t blk_sq_make_request(struct 
> request_queue *q, struct bio *bio)
>   rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, );
>   if (unlikely(!rq)) {
>   __wbt_done(q->rq_wb, wb_acct);
> + if (bio && (bio->bi_opf & REQ_NOWAIT))
> + bio_wouldblock_error(bio);

Same here.  Although blk_sq_make_request is gone anyway in the current
block tree..

> + /* Request queue supports BIO_NOWAIT */
> + queue_flag_set_unlocked(QUEUE_FLAG_NOWAIT, q);

BIO_NOWAIT is gone.  And the comment would not be needed if the
flag had a more descriptive name, e.g. QUEUE_FLAG_NOWAIT_SUPPORT.

And I think all request based drivers should set the flag implicitly
as ->queuecommand can't sleep, and ->queue_rq only when it's always
offloaded to a workqueue when the BLK_MQ_F_BLOCKING flag is set.

> --- a/fs/direct-io.c
> +++ b/fs/direct-io.c
> @@ -480,8 +480,12 @@ static int dio_bio_complete(struct dio *dio, struct bio 
> *bio)
>   unsigned i;
>   int err;
>  
> - if (bio->bi_error)
> - dio->io_error = -EIO;
> + if (bio->bi_error) {
> + if (bio->bi_opf & REQ_NOWAIT)
> + dio->io_error = -EAGAIN;
> + else
> + dio->io_error = -EIO;
> + }

Huh?  Once REQ_NOWAIT is set all errors are -EAGAIN?
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/8] nowait aio: return on congested block device

2017-04-14 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

A new bio operation flag REQ_NOWAIT is introduced to identify bio's
orignating from iocb with IOCB_NOWAIT. This flag indicates
to return immediately if a request cannot be made instead
of retrying.

To facilitate this, QUEUE_FLAG_NOWAIT is set to devices
which support this. While currently this is set to
virtio and sd only. Support to more devices will be added soon
once I am sure they don't block. Currently blocks such as dm/md
block while performing sync.

Signed-off-by: Goldwyn Rodrigues 
---
 block/blk-core.c   | 24 ++--
 block/blk-mq-sched.c   |  3 +++
 block/blk-mq.c |  4 
 drivers/block/virtio_blk.c |  3 +++
 drivers/scsi/sd.c  |  3 +++
 fs/direct-io.c | 10 --
 include/linux/bio.h|  6 ++
 include/linux/blk_types.h  |  2 ++
 include/linux/blkdev.h |  3 +++
 9 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index d772c221cc17..54698521756b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1232,6 +1232,11 @@ static struct request *get_request(struct request_queue 
*q, unsigned int op,
if (!IS_ERR(rq))
return rq;
 
+   if (bio && (bio->bi_opf & REQ_NOWAIT)) {
+   blk_put_rl(rl);
+   return ERR_PTR(-EAGAIN);
+   }
+
if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) 
{
blk_put_rl(rl);
return rq;
@@ -1870,6 +1875,18 @@ generic_make_request_checks(struct bio *bio)
goto end_io;
}
 
+   if (bio->bi_opf & REQ_NOWAIT) {
+   if (!blk_queue_nowait(q)) {
+   err = -EOPNOTSUPP;
+   goto end_io;
+   }
+   if (!(bio->bi_opf & REQ_SYNC)) {
+   err = -EINVAL;
+   goto end_io;
+   }
+   }
+
+
part = bio->bi_bdev->bd_part;
if (should_fail_request(part, bio->bi_iter.bi_size) ||
should_fail_request(_to_disk(part)->part0,
@@ -2021,7 +2038,7 @@ blk_qc_t generic_make_request(struct bio *bio)
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
-   if (likely(blk_queue_enter(q, false) == 0)) {
+   if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
struct bio_list lower, same;
 
/* Create a fresh bio_list for all subordinate requests 
*/
@@ -2046,7 +2063,10 @@ blk_qc_t generic_make_request(struct bio *bio)
bio_list_merge(_list_on_stack[0], );
bio_list_merge(_list_on_stack[0], 
_list_on_stack[1]);
} else {
-   bio_io_error(bio);
+   if (unlikely(!blk_queue_dying(q) && (bio->bi_opf & 
REQ_NOWAIT)))
+   bio_wouldblock_error(bio);
+   else
+   bio_io_error(bio);
}
bio = bio_list_pop(_list_on_stack[0]);
} while (bio);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index c974a1bbf4cb..9f88190ff395 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -119,6 +119,9 @@ struct request *blk_mq_sched_get_request(struct 
request_queue *q,
if (likely(!data->hctx))
data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
 
+   if (bio && (bio->bi_opf & REQ_NOWAIT))
+   data->flags |= BLK_MQ_REQ_NOWAIT;
+
if (e) {
data->flags |= BLK_MQ_REQ_INTERNAL;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 572966f49596..8b9b1a411ce2 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1538,6 +1538,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue 
*q, struct bio *bio)
rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, );
if (unlikely(!rq)) {
__wbt_done(q->rq_wb, wb_acct);
+   if (bio && (bio->bi_opf & REQ_NOWAIT))
+   bio_wouldblock_error(bio);
return BLK_QC_T_NONE;
}
 
@@ -1662,6 +1664,8 @@ static blk_qc_t blk_sq_make_request(struct request_queue 
*q, struct bio *bio)
rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, );
if (unlikely(!rq)) {
__wbt_done(q->rq_wb, wb_acct);
+   if (bio && (bio->bi_opf & REQ_NOWAIT))
+   bio_wouldblock_error(bio);
return BLK_QC_T_NONE;
}
 
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1d4c9f8bc1e1..7481124c5025 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -731,6 +731,9 @@ static int virtblk_probe(struct virtio_device *vdev)
/* No real sector limit. */
blk_queue_max_hw_sectors(q, -1U);
 
+   /* Request queue supports BIO_NOWAIT */
+

Re: [PATCH 5/8] nowait aio: return on congested block device

2017-04-04 Thread Christoph Hellwig
Please make this a REQ_* flag so that it can be passed in the bio,
the request and as an argument to the get_request functions instead
of testing for a bio.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/8] nowait aio: return on congested block device

2017-04-03 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

A new flag BIO_NOWAIT is introduced to identify bio's
orignating from iocb with IOCB_NOWAIT. This flag indicates
to return immediately if a request cannot be made instead
of retrying.

To facilitate this, QUEUE_FLAG_NOWAIT is set to devices
which support this. While currently this is set to
virtio and sd only. Support to more devices will be added soon.

Signed-off-by: Goldwyn Rodrigues 
---
 block/blk-core.c   | 24 ++--
 block/blk-mq-sched.c   |  3 +++
 block/blk-mq.c |  4 
 drivers/block/virtio_blk.c |  3 +++
 drivers/scsi/sd.c  |  3 +++
 fs/direct-io.c | 11 +--
 include/linux/bio.h|  6 ++
 include/linux/blk_types.h  |  1 +
 include/linux/blkdev.h |  2 ++
 9 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index d772c22..95a9b18 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1232,6 +1232,11 @@ static struct request *get_request(struct request_queue 
*q, unsigned int op,
if (!IS_ERR(rq))
return rq;
 
+   if (bio && bio_flagged(bio, BIO_NOWAIT)) {
+   blk_put_rl(rl);
+   return ERR_PTR(-EAGAIN);
+   }
+
if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) 
{
blk_put_rl(rl);
return rq;
@@ -1870,6 +1875,18 @@ generic_make_request_checks(struct bio *bio)
goto end_io;
}
 
+   if (bio_flagged(bio, BIO_NOWAIT)) {
+   if (!blk_queue_nowait(q)) {
+   err = -EOPNOTSUPP;
+   goto end_io;
+   }
+   if (!(bio->bi_opf & (REQ_SYNC | REQ_IDLE))) {
+   err = -EINVAL;
+   goto end_io;
+   }
+   }
+
+
part = bio->bi_bdev->bd_part;
if (should_fail_request(part, bio->bi_iter.bi_size) ||
should_fail_request(_to_disk(part)->part0,
@@ -2021,7 +2038,7 @@ blk_qc_t generic_make_request(struct bio *bio)
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
-   if (likely(blk_queue_enter(q, false) == 0)) {
+   if (likely(blk_queue_enter(q, bio_flagged(bio, BIO_NOWAIT)) == 
0)) {
struct bio_list lower, same;
 
/* Create a fresh bio_list for all subordinate requests 
*/
@@ -2046,7 +2063,10 @@ blk_qc_t generic_make_request(struct bio *bio)
bio_list_merge(_list_on_stack[0], );
bio_list_merge(_list_on_stack[0], 
_list_on_stack[1]);
} else {
-   bio_io_error(bio);
+   if (unlikely(!blk_queue_dying(q) && bio_flagged(bio, 
BIO_NOWAIT)))
+   bio_wouldblock_error(bio);
+   else
+   bio_io_error(bio);
}
bio = bio_list_pop(_list_on_stack[0]);
} while (bio);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 09af8ff..40e78b5 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -119,6 +119,9 @@ struct request *blk_mq_sched_get_request(struct 
request_queue *q,
if (likely(!data->hctx))
data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
 
+   if (likely(bio) && bio_flagged(bio, BIO_NOWAIT))
+   data->flags |= BLK_MQ_REQ_NOWAIT;
+
if (e) {
data->flags |= BLK_MQ_REQ_INTERNAL;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6b6e7bc..2d90b12 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1511,6 +1511,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue 
*q, struct bio *bio)
rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, );
if (unlikely(!rq)) {
__wbt_done(q->rq_wb, wb_acct);
+   if (bio && bio_flagged(bio, BIO_NOWAIT))
+   bio_wouldblock_error(bio);
return BLK_QC_T_NONE;
}
 
@@ -1635,6 +1637,8 @@ static blk_qc_t blk_sq_make_request(struct request_queue 
*q, struct bio *bio)
rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, );
if (unlikely(!rq)) {
__wbt_done(q->rq_wb, wb_acct);
+   if (bio && bio_flagged(bio, BIO_NOWAIT))
+   bio_wouldblock_error(bio);
return BLK_QC_T_NONE;
}
 
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1d4c9f8..7481124 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -731,6 +731,9 @@ static int virtblk_probe(struct virtio_device *vdev)
/* No real sector limit. */
blk_queue_max_hw_sectors(q, -1U);
 
+   /* Request queue supports BIO_NOWAIT */
+   queue_flag_set_unlocked(QUEUE_FLAG_NOWAIT, q);
+
/* Host can optionally specify maximum segment 

Re: [PATCH 5/8] nowait aio: return on congested block device

2017-03-24 Thread Jens Axboe
On 03/24/2017 05:32 AM, Goldwyn Rodrigues wrote:
> 
> 
> On 03/16/2017 09:33 AM, Jens Axboe wrote:
>> On 03/15/2017 03:51 PM, Goldwyn Rodrigues wrote:
>>> diff --git a/block/blk-core.c b/block/blk-core.c
>>> index 0eeb99e..2e5cba2 100644
>>> --- a/block/blk-core.c
>>> +++ b/block/blk-core.c
>>> @@ -2014,7 +2019,7 @@ blk_qc_t generic_make_request(struct bio *bio)
>>> do {
>>> struct request_queue *q = bdev_get_queue(bio->bi_bdev);
>>>  
>>> -   if (likely(blk_queue_enter(q, false) == 0)) {
>>> +   if (likely(blk_queue_enter(q, bio_flagged(bio, BIO_NOWAIT)) == 
>>> 0)) {
>>> struct bio_list hold;
>>> struct bio_list lower, same;
>>>  
>>> @@ -2040,7 +2045,10 @@ blk_qc_t generic_make_request(struct bio *bio)
>>> bio_list_merge(_list_on_stack, );
>>> bio_list_merge(_list_on_stack, );
>>> } else {
>>> -   bio_io_error(bio);
>>> +   if (unlikely(bio_flagged(bio, BIO_NOWAIT)))
>>> +   bio_wouldblock_error(bio);
>>> +   else
>>> +   bio_io_error(bio);
>>
>> This doesn't look right. What if the queue is dying, and BIO_NOWAIT just
>> happened to be set?
>>
> 
> Yes, I need to add a condition here to check for blk_queue_dying(). Thanks.
> 
>> And you're missing wbt_wait() as well as a blocking point. Ditto in
>> blk-mq.
> 
> wbt_wait() does not apply to WRITE_ODIRECT

It doesn't _currently_ block for it, but that could change. It should
have an explicit check.

Additionally, the more weird assumptions you make, the more half assed
this will be. The other assumption made here is that only direct IO
would set nonblock. We have to assume that any IO can have NONBLOCK set,
and not make any assumptions about the caller.

-- 
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-03-24 Thread Goldwyn Rodrigues


On 03/16/2017 09:33 AM, Jens Axboe wrote:
> On 03/15/2017 03:51 PM, Goldwyn Rodrigues wrote:
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index 0eeb99e..2e5cba2 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -2014,7 +2019,7 @@ blk_qc_t generic_make_request(struct bio *bio)
>>  do {
>>  struct request_queue *q = bdev_get_queue(bio->bi_bdev);
>>  
>> -if (likely(blk_queue_enter(q, false) == 0)) {
>> +if (likely(blk_queue_enter(q, bio_flagged(bio, BIO_NOWAIT)) == 
>> 0)) {
>>  struct bio_list hold;
>>  struct bio_list lower, same;
>>  
>> @@ -2040,7 +2045,10 @@ blk_qc_t generic_make_request(struct bio *bio)
>>  bio_list_merge(_list_on_stack, );
>>  bio_list_merge(_list_on_stack, );
>>  } else {
>> -bio_io_error(bio);
>> +if (unlikely(bio_flagged(bio, BIO_NOWAIT)))
>> +bio_wouldblock_error(bio);
>> +else
>> +bio_io_error(bio);
> 
> This doesn't look right. What if the queue is dying, and BIO_NOWAIT just
> happened to be set?
> 

Yes, I need to add a condition here to check for blk_queue_dying(). Thanks.

> And you're missing wbt_wait() as well as a blocking point. Ditto in
> blk-mq.

wbt_wait() does not apply to WRITE_ODIRECT


> 
>> diff --git a/block/blk-mq.c b/block/blk-mq.c
>> index 159187a..942ce8c 100644
>> --- a/block/blk-mq.c
>> +++ b/block/blk-mq.c
>> @@ -1518,6 +1518,8 @@ static blk_qc_t blk_mq_make_request(struct 
>> request_queue *q, struct bio *bio)
>>  rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, );
>>  if (unlikely(!rq)) {
>>  __wbt_done(q->rq_wb, wb_acct);
>> +if (bio && bio_flagged(bio, BIO_NOWAIT))
>> +bio_wouldblock_error(bio);
>>  return BLK_QC_T_NONE;
>>  }
>>  
> 
> This seems a little fragile now, since not both paths free the bio.
> 

Direct I/O should free the bios in bio_dio_complete(). I am not sure why
it would not free bio here originally, but IIRC, this path is for
bio==NULL only. So, with this patch we would get a rq==NULL here and
hence the bio_wouldblock_error() call.

-- 
Goldwyn
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-03-20 Thread Jan Kara
On Fri 17-03-17 07:23:24, Goldwyn Rodrigues wrote:
> On 03/16/2017 04:31 PM, Dave Chinner wrote:
> > On Wed, Mar 15, 2017 at 04:51:04PM -0500, Goldwyn Rodrigues wrote:
> >> From: Goldwyn Rodrigues 
> >>
> >> A new flag BIO_NOWAIT is introduced to identify bio's
> >> orignating from iocb with IOCB_NOWAIT. This flag indicates
> >> to return immediately if a request cannot be made instead
> >> of retrying.
> > 
> > So this makes a congested block device run the bio IO completion
> > callback with an -EAGAIN error present? Are all the filesystem
> > direct IO submission and completion routines OK with that? i.e. does
> > such a congestion case cause filesystems to temporarily expose stale
> > data to unprivileged users when the IO is requeued in this way?
> > 
> > e.g. filesystem does allocation without blocking, submits bio,
> > device is congested, runs IO completion with error, so nothing
> > written to allocated blocks, write gets queued, so other read
> > comes in while the write is queued, reads data from uninitialised
> > blocks that were allocated during the write
> > 
> > Seems kinda problematic to me to have a undocumented design
> > constraint (i.e a landmine) where we submit the AIO only to have it
> > error out and then expect the filesystem to do something special and
> > different /without blocking/ on EAGAIN.
> 
> If the filesystems has to perform block allocation, we would return
> -EAGAIN early enough. However, I agree there is a problem, since not all
> filesystems know this. I worked on only three of them.

So Dave has a good point about the missing documentation explaining the
expectation from the filesystem. Other than that the filesystem is
responsible for determining in its direct IO submission routine whether it
can deal with NOWAIT direct IO without blocking (including backing out of
block layer refuses to do the IO) and if not, just return with EAGAIN right
away.

Probably we should also have a feature flag in filesystem_type telling
whether a particular filesystem knows about NOWAIT direct IO at all so that
we don't have to add stub to each and every filesystem supporting direct IO
that returns EAGAIN when NOWAIT is set (OTOH adding that stub won't be that
hard either since there are not *that* many filesystems supporting direct
IO). But one or the other has to be done.

> > Why isn't the congestion check at a higher layer like we do for page
> > cache readahead? i.e. using the bdi*congested() API at the time we
> > are doing all the other filesystem blocking checks.
> > 
> 
> Yes, that may work better. We will have to call bdi_read_congested() on
> a write path. (will have to comment that part of the code). Would it
> encompass all possible waits in the block layer?

Congestion mechanism is not really reliable. The bdi can show the device is
not congested but block layer will still block you waiting for some
resource. And for some of the stuff like tag allocation you cannot really
do a reliable check in advance so I don't think congestion checks are
really a viable alternative.

Honza
-- 
Jan Kara 
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-03-17 Thread Goldwyn Rodrigues


On 03/16/2017 04:31 PM, Dave Chinner wrote:
> On Wed, Mar 15, 2017 at 04:51:04PM -0500, Goldwyn Rodrigues wrote:
>> From: Goldwyn Rodrigues 
>>
>> A new flag BIO_NOWAIT is introduced to identify bio's
>> orignating from iocb with IOCB_NOWAIT. This flag indicates
>> to return immediately if a request cannot be made instead
>> of retrying.
> 
> So this makes a congested block device run the bio IO completion
> callback with an -EAGAIN error present? Are all the filesystem
> direct IO submission and completion routines OK with that? i.e. does
> such a congestion case cause filesystems to temporarily expose stale
> data to unprivileged users when the IO is requeued in this way?
> 
> e.g. filesystem does allocation without blocking, submits bio,
> device is congested, runs IO completion with error, so nothing
> written to allocated blocks, write gets queued, so other read
> comes in while the write is queued, reads data from uninitialised
> blocks that were allocated during the write
> 
> Seems kinda problematic to me to have a undocumented design
> constraint (i.e a landmine) where we submit the AIO only to have it
> error out and then expect the filesystem to do something special and
> different /without blocking/ on EAGAIN.


If the filesystems has to perform block allocation, we would return
-EAGAIN early enough. However, I agree there is a problem, since not all
filesystems know this. I worked on only three of them.

> 
> Why isn't the congestion check at a higher layer like we do for page
> cache readahead? i.e. using the bdi*congested() API at the time we
> are doing all the other filesystem blocking checks.
> 

Yes, that may work better. We will have to call bdi_read_congested() on
a write path. (will have to comment that part of the code). Would it
encompass all possible waits in the block layer?


-- 
Goldwyn
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-03-17 Thread Goldwyn Rodrigues


On 03/16/2017 09:33 AM, Jens Axboe wrote:
> On 03/15/2017 03:51 PM, Goldwyn Rodrigues wrote:
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index 0eeb99e..2e5cba2 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -2014,7 +2019,7 @@ blk_qc_t generic_make_request(struct bio *bio)
>>  do {
>>  struct request_queue *q = bdev_get_queue(bio->bi_bdev);
>>  
>> -if (likely(blk_queue_enter(q, false) == 0)) {
>> +if (likely(blk_queue_enter(q, bio_flagged(bio, BIO_NOWAIT)) == 
>> 0)) {
>>  struct bio_list hold;
>>  struct bio_list lower, same;
>>  
>> @@ -2040,7 +2045,10 @@ blk_qc_t generic_make_request(struct bio *bio)
>>  bio_list_merge(_list_on_stack, );
>>  bio_list_merge(_list_on_stack, );
>>  } else {
>> -bio_io_error(bio);
>> +if (unlikely(bio_flagged(bio, BIO_NOWAIT)))
>> +bio_wouldblock_error(bio);
>> +else
>> +bio_io_error(bio);
> 
> This doesn't look right. What if the queue is dying, and BIO_NOWAIT just
> happened to be set?

Yes, need a check for that.
> 
> And you're missing wbt_wait() as well as a blocking point. Ditto in
> blk-mq.

Agree.

> 
>> diff --git a/block/blk-mq.c b/block/blk-mq.c
>> index 159187a..942ce8c 100644
>> --- a/block/blk-mq.c
>> +++ b/block/blk-mq.c
>> @@ -1518,6 +1518,8 @@ static blk_qc_t blk_mq_make_request(struct 
>> request_queue *q, struct bio *bio)
>>  rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, );
>>  if (unlikely(!rq)) {
>>  __wbt_done(q->rq_wb, wb_acct);
>> +if (bio && bio_flagged(bio, BIO_NOWAIT))
>> +bio_wouldblock_error(bio);
>>  return BLK_QC_T_NONE;
>>  }
>>  
> 
> This seems a little fragile now, since not both paths free the bio.
> 

bio_put() is handled in dio_bio_complete(). However, I am not sure why
there is no bio_endio() call for !rq. IIRC, it was not meant to be.

-- 
Goldwyn
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-03-16 Thread Ming Lei
On Thu, Mar 16, 2017 at 10:33 PM, Jens Axboe  wrote:
> On 03/15/2017 03:51 PM, Goldwyn Rodrigues wrote:
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index 0eeb99e..2e5cba2 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -2014,7 +2019,7 @@ blk_qc_t generic_make_request(struct bio *bio)
>>   do {
>>   struct request_queue *q = bdev_get_queue(bio->bi_bdev);
>>
>> - if (likely(blk_queue_enter(q, false) == 0)) {
>> + if (likely(blk_queue_enter(q, bio_flagged(bio, BIO_NOWAIT)) == 
>> 0)) {
>>   struct bio_list hold;
>>   struct bio_list lower, same;
>>
>> @@ -2040,7 +2045,10 @@ blk_qc_t generic_make_request(struct bio *bio)
>>   bio_list_merge(_list_on_stack, );
>>   bio_list_merge(_list_on_stack, );
>>   } else {
>> - bio_io_error(bio);
>> + if (unlikely(bio_flagged(bio, BIO_NOWAIT)))
>> + bio_wouldblock_error(bio);
>> + else
>> + bio_io_error(bio);
>
> This doesn't look right. What if the queue is dying, and BIO_NOWAIT just
> happened to be set?
>
> And you're missing wbt_wait() as well as a blocking point. Ditto in
> blk-mq.

There are also tons of block points in dm/md/bcache, which need to be
considered or be documented at least, :-)



Thanks,
Ming Lei
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-03-16 Thread Dave Chinner
On Wed, Mar 15, 2017 at 04:51:04PM -0500, Goldwyn Rodrigues wrote:
> From: Goldwyn Rodrigues 
> 
> A new flag BIO_NOWAIT is introduced to identify bio's
> orignating from iocb with IOCB_NOWAIT. This flag indicates
> to return immediately if a request cannot be made instead
> of retrying.

So this makes a congested block device run the bio IO completion
callback with an -EAGAIN error present? Are all the filesystem
direct IO submission and completion routines OK with that? i.e. does
such a congestion case cause filesystems to temporarily expose stale
data to unprivileged users when the IO is requeued in this way?

e.g. filesystem does allocation without blocking, submits bio,
device is congested, runs IO completion with error, so nothing
written to allocated blocks, write gets queued, so other read
comes in while the write is queued, reads data from uninitialised
blocks that were allocated during the write

Seems kinda problematic to me to have a undocumented design
constraint (i.e a landmine) where we submit the AIO only to have it
error out and then expect the filesystem to do something special and
different /without blocking/ on EAGAIN.

Why isn't the congestion check at a higher layer like we do for page
cache readahead? i.e. using the bdi*congested() API at the time we
are doing all the other filesystem blocking checks.

-Dave.
-- 
Dave Chinner
da...@fromorbit.com
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-03-16 Thread Jens Axboe
On 03/15/2017 03:51 PM, Goldwyn Rodrigues wrote:
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 0eeb99e..2e5cba2 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -2014,7 +2019,7 @@ blk_qc_t generic_make_request(struct bio *bio)
>   do {
>   struct request_queue *q = bdev_get_queue(bio->bi_bdev);
>  
> - if (likely(blk_queue_enter(q, false) == 0)) {
> + if (likely(blk_queue_enter(q, bio_flagged(bio, BIO_NOWAIT)) == 
> 0)) {
>   struct bio_list hold;
>   struct bio_list lower, same;
>  
> @@ -2040,7 +2045,10 @@ blk_qc_t generic_make_request(struct bio *bio)
>   bio_list_merge(_list_on_stack, );
>   bio_list_merge(_list_on_stack, );
>   } else {
> - bio_io_error(bio);
> + if (unlikely(bio_flagged(bio, BIO_NOWAIT)))
> + bio_wouldblock_error(bio);
> + else
> + bio_io_error(bio);

This doesn't look right. What if the queue is dying, and BIO_NOWAIT just
happened to be set?

And you're missing wbt_wait() as well as a blocking point. Ditto in
blk-mq.

> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 159187a..942ce8c 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -1518,6 +1518,8 @@ static blk_qc_t blk_mq_make_request(struct 
> request_queue *q, struct bio *bio)
>   rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, );
>   if (unlikely(!rq)) {
>   __wbt_done(q->rq_wb, wb_acct);
> + if (bio && bio_flagged(bio, BIO_NOWAIT))
> + bio_wouldblock_error(bio);
>   return BLK_QC_T_NONE;
>   }
>  

This seems a little fragile now, since not both paths free the bio.

-- 
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/8] nowait aio: return on congested block device

2017-03-15 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

A new flag BIO_NOWAIT is introduced to identify bio's
orignating from iocb with IOCB_NOWAIT. This flag indicates
to return immediately if a request cannot be made instead
of retrying.

Signed-off-by: Goldwyn Rodrigues 
---
 block/blk-core.c  | 12 ++--
 block/blk-mq-sched.c  |  3 +++
 block/blk-mq.c|  4 
 fs/direct-io.c| 11 +--
 include/linux/bio.h   |  6 ++
 include/linux/blk_types.h |  1 +
 6 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 0eeb99e..2e5cba2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1232,6 +1232,11 @@ static struct request *get_request(struct request_queue 
*q, unsigned int op,
if (!IS_ERR(rq))
return rq;
 
+   if (bio && bio_flagged(bio, BIO_NOWAIT)) {
+   blk_put_rl(rl);
+   return ERR_PTR(-EAGAIN);
+   }
+
if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) 
{
blk_put_rl(rl);
return rq;
@@ -2014,7 +2019,7 @@ blk_qc_t generic_make_request(struct bio *bio)
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
-   if (likely(blk_queue_enter(q, false) == 0)) {
+   if (likely(blk_queue_enter(q, bio_flagged(bio, BIO_NOWAIT)) == 
0)) {
struct bio_list hold;
struct bio_list lower, same;
 
@@ -2040,7 +2045,10 @@ blk_qc_t generic_make_request(struct bio *bio)
bio_list_merge(_list_on_stack, );
bio_list_merge(_list_on_stack, );
} else {
-   bio_io_error(bio);
+   if (unlikely(bio_flagged(bio, BIO_NOWAIT)))
+   bio_wouldblock_error(bio);
+   else
+   bio_io_error(bio);
}
bio = bio_list_pop(current->bio_list);
} while (bio);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 09af8ff..40e78b5 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -119,6 +119,9 @@ struct request *blk_mq_sched_get_request(struct 
request_queue *q,
if (likely(!data->hctx))
data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
 
+   if (likely(bio) && bio_flagged(bio, BIO_NOWAIT))
+   data->flags |= BLK_MQ_REQ_NOWAIT;
+
if (e) {
data->flags |= BLK_MQ_REQ_INTERNAL;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 159187a..942ce8c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1518,6 +1518,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue 
*q, struct bio *bio)
rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, );
if (unlikely(!rq)) {
__wbt_done(q->rq_wb, wb_acct);
+   if (bio && bio_flagged(bio, BIO_NOWAIT))
+   bio_wouldblock_error(bio);
return BLK_QC_T_NONE;
}
 
@@ -1642,6 +1644,8 @@ static blk_qc_t blk_sq_make_request(struct request_queue 
*q, struct bio *bio)
rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, );
if (unlikely(!rq)) {
__wbt_done(q->rq_wb, wb_acct);
+   if (bio && bio_flagged(bio, BIO_NOWAIT))
+   bio_wouldblock_error(bio);
return BLK_QC_T_NONE;
}
 
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a04ebea..f6835d3 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -386,6 +386,9 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
else
bio->bi_end_io = dio_bio_end_io;
 
+   if (dio->iocb->ki_flags & IOCB_NOWAIT)
+   bio_set_flag(bio, BIO_NOWAIT);
+
sdio->bio = bio;
sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
 }
@@ -480,8 +483,12 @@ static int dio_bio_complete(struct dio *dio, struct bio 
*bio)
unsigned i;
int err;
 
-   if (bio->bi_error)
-   dio->io_error = -EIO;
+   if (bio->bi_error) {
+   if (bio_flagged(bio, BIO_NOWAIT))
+   dio->io_error = -EAGAIN;
+   else
+   dio->io_error = -EIO;
+   }
 
if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
err = bio->bi_error;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 8e52119..1a92707 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -425,6 +425,12 @@ static inline void bio_io_error(struct bio *bio)
bio_endio(bio);
 }
 
+static inline void bio_wouldblock_error(struct bio *bio)
+{
+   bio->bi_error = -EAGAIN;
+   bio_endio(bio);
+}
+
 struct request_queue;
 extern int bio_phys_segments(struct request_queue *, struct bio *);
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 

Re: [PATCH 5/8] nowait aio: return on congested block device

2017-03-08 Thread Goldwyn Rodrigues


On 03/08/2017 10:17 AM, Jens Axboe wrote:
> On 03/08/2017 08:00 AM, Goldwyn Rodrigues wrote:
>>
>>
>> On 03/08/2017 01:03 AM, Sagi Grimberg wrote:
>>>
 -if (likely(blk_queue_enter(q, false) == 0)) {
 +if (likely(blk_queue_enter(q, bio_flagged(bio, BIO_NOWAIT))
 == 0)) {
  ret = q->make_request_fn(q, bio);
>>>
>>> I think that for ->make_request to not block we'd need to set
>>> BLK_MQ_REQ_NOWAIT in blk_mq_alloc_data to avoid blocking on a tag
>>> allocation.
>>>
>>> Something like the untested addition below:
>>
>> I did that in the first series, but there are too many reasons to block
>> in blk-mq [1]. I dropped blk-mq work in v2.
> 
> That's complete nonsense, there are no more places in blk-mq that will
> block that in the legacy path. Most of the examples from your URL:
> 
>> [1] https://patchwork.kernel.org/patch/9571051/
> 
> are not blk-mq, but writeback throttling, and drivers that explicitly
> hook into ->make_request_fn.
> 
> As others have mentioned, it's a total non-starter to focus on the
> deprecated IO path and just ignore the new one. Back to the drawing
> board.
> 

Thanks, I understood what I was doing wrong. I will include blk-mq in
the patch.

-- 
Goldwyn
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-03-08 Thread Jens Axboe
On 03/08/2017 08:00 AM, Goldwyn Rodrigues wrote:
> 
> 
> On 03/08/2017 01:03 AM, Sagi Grimberg wrote:
>>
>>> -if (likely(blk_queue_enter(q, false) == 0)) {
>>> +if (likely(blk_queue_enter(q, bio_flagged(bio, BIO_NOWAIT))
>>> == 0)) {
>>>  ret = q->make_request_fn(q, bio);
>>
>> I think that for ->make_request to not block we'd need to set
>> BLK_MQ_REQ_NOWAIT in blk_mq_alloc_data to avoid blocking on a tag
>> allocation.
>>
>> Something like the untested addition below:
> 
> I did that in the first series, but there are too many reasons to block
> in blk-mq [1]. I dropped blk-mq work in v2.

That's complete nonsense, there are no more places in blk-mq that will
block that in the legacy path. Most of the examples from your URL:

> [1] https://patchwork.kernel.org/patch/9571051/

are not blk-mq, but writeback throttling, and drivers that explicitly
hook into ->make_request_fn.

As others have mentioned, it's a total non-starter to focus on the
deprecated IO path and just ignore the new one. Back to the drawing
board.

-- 
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-03-08 Thread Christoph Hellwig
On Wed, Mar 08, 2017 at 04:28:06PM +0100, Jan Kara wrote:
> Well, that's not really good. If we cannot support this for both blk-mq and
> legacy block layer the feature will not be usable. So please work on blk-mq
> support as well.

Exactly.  In addition to that anything implementing a feature for the
legacy request request code but not blk-mq is grounds for an instant
NAK.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-03-08 Thread Jan Kara
On Wed 08-03-17 09:00:09, Goldwyn Rodrigues wrote:
> 
> 
> On 03/08/2017 01:03 AM, Sagi Grimberg wrote:
> > 
> >> -if (likely(blk_queue_enter(q, false) == 0)) {
> >> +if (likely(blk_queue_enter(q, bio_flagged(bio, BIO_NOWAIT))
> >> == 0)) {
> >>  ret = q->make_request_fn(q, bio);
> > 
> > I think that for ->make_request to not block we'd need to set
> > BLK_MQ_REQ_NOWAIT in blk_mq_alloc_data to avoid blocking on a tag
> > allocation.
> > 
> > Something like the untested addition below:
> 
> I did that in the first series, but there are too many reasons to block
> in blk-mq [1]. I dropped blk-mq work in v2.
> 
> [1] https://patchwork.kernel.org/patch/9571051/

Well, that's not really good. If we cannot support this for both blk-mq and
legacy block layer the feature will not be usable. So please work on blk-mq
support as well.

Honza 

-- 
Jan Kara 
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-03-08 Thread Goldwyn Rodrigues


On 03/08/2017 01:03 AM, Sagi Grimberg wrote:
> 
>> -if (likely(blk_queue_enter(q, false) == 0)) {
>> +if (likely(blk_queue_enter(q, bio_flagged(bio, BIO_NOWAIT))
>> == 0)) {
>>  ret = q->make_request_fn(q, bio);
> 
> I think that for ->make_request to not block we'd need to set
> BLK_MQ_REQ_NOWAIT in blk_mq_alloc_data to avoid blocking on a tag
> allocation.
> 
> Something like the untested addition below:

I did that in the first series, but there are too many reasons to block
in blk-mq [1]. I dropped blk-mq work in v2.

[1] https://patchwork.kernel.org/patch/9571051/

-- 
Goldwyn
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] nowait aio: return on congested block device

2017-03-07 Thread Sagi Grimberg



-   if (likely(blk_queue_enter(q, false) == 0)) {
+   if (likely(blk_queue_enter(q, bio_flagged(bio, BIO_NOWAIT)) == 
0)) {
ret = q->make_request_fn(q, bio);


I think that for ->make_request to not block we'd need to set
BLK_MQ_REQ_NOWAIT in blk_mq_alloc_data to avoid blocking on a tag
allocation.

Something like the untested addition below:
--
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 09af8ff18719..40e78b57fb44 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -119,6 +119,9 @@ struct request *blk_mq_sched_get_request(struct 
request_queue *q,

if (likely(!data->hctx))
data->hctx = blk_mq_map_queue(q, data->ctx->cpu);

+   if (likely(bio) && bio_flagged(bio, BIO_NOWAIT))
+   data->flags |= BLK_MQ_REQ_NOWAIT;
+
if (e) {
data->flags |= BLK_MQ_REQ_INTERNAL;
--
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/8] nowait aio: return on congested block device

2017-02-28 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

A new flag BIO_NOWAIT is introduced to identify bio's
orignating from iocb with IOCB_NOWAIT. This flag indicates
to return immediately if a request cannot be made instead
of retrying.

Signed-off-by: Goldwyn Rodrigues 
---
 block/blk-core.c  | 13 +++--
 fs/direct-io.c| 11 +--
 include/linux/blk_types.h |  1 +
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 61ba08c..e5cfc50 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1258,6 +1258,11 @@ static struct request *get_request(struct request_queue 
*q, unsigned int op,
if (!IS_ERR(rq))
return rq;
 
+   if (bio_flagged(bio, BIO_NOWAIT)) {
+   blk_put_rl(rl);
+   return ERR_PTR(-EAGAIN);
+   }
+
if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) 
{
blk_put_rl(rl);
return rq;
@@ -2018,7 +2023,7 @@ blk_qc_t generic_make_request(struct bio *bio)
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
-   if (likely(blk_queue_enter(q, false) == 0)) {
+   if (likely(blk_queue_enter(q, bio_flagged(bio, BIO_NOWAIT)) == 
0)) {
ret = q->make_request_fn(q, bio);
 
blk_queue_exit(q);
@@ -2027,7 +2032,11 @@ blk_qc_t generic_make_request(struct bio *bio)
} else {
struct bio *bio_next = bio_list_pop(current->bio_list);
 
-   bio_io_error(bio);
+   if (unlikely(bio_flagged(bio, BIO_NOWAIT))) {
+   bio->bi_error = -EAGAIN;
+   bio_endio(bio);
+   } else
+   bio_io_error(bio);
bio = bio_next;
}
} while (bio);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index c87bae4..2973df0 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -386,6 +386,9 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
else
bio->bi_end_io = dio_bio_end_io;
 
+   if (dio->iocb->ki_flags & IOCB_NOWAIT)
+   bio_set_flag(bio, BIO_NOWAIT);
+
sdio->bio = bio;
sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
 }
@@ -480,8 +483,12 @@ static int dio_bio_complete(struct dio *dio, struct bio 
*bio)
unsigned i;
int err;
 
-   if (bio->bi_error)
-   dio->io_error = -EIO;
+   if (bio->bi_error) {
+   if (bio_flagged(bio, BIO_NOWAIT))
+   dio->io_error = bio->bi_error;
+   else
+   dio->io_error = -EIO;
+   }
 
if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
err = bio->bi_error;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 519ea2c..1d77e9b 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -102,6 +102,7 @@ struct bio {
 #define BIO_REFFED 8   /* bio has elevated ->bi_cnt */
 #define BIO_THROTTLED  9   /* This bio has already been subjected to
 * throttling rules. Don't do it again. */
+#define BIO_NOWAIT 10  /* don't block over blk device congestion */
 
 /*
  * Flags starting here get preserved by bio_reset() - this includes
-- 
2.10.2

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html