On Wed, 2018-02-07 at 12:07 -0800, t...@kernel.org wrote:
> Ah, you're right.  u64_stat_sync doesn't imply barriers, so we want
> something like the following.
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index df93102..d6edf3b 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -593,7 +593,7 @@ static void blk_mq_rq_update_aborted_gstate(struct 
> request *rq, u64 gstate)
>        */
>       local_irq_save(flags);
>       u64_stats_update_begin(&rq->aborted_gstate_sync);
> -     rq->aborted_gstate = gstate;
> +     smp_store_release(&rq->aborted_gstate, gstate);
>       u64_stats_update_end(&rq->aborted_gstate_sync);
>       local_irq_restore(flags);
>  }
> @@ -605,7 +605,7 @@ static u64 blk_mq_rq_aborted_gstate(struct request *rq)
>  
>       do {
>               start = u64_stats_fetch_begin(&rq->aborted_gstate_sync);
> -             aborted_gstate = rq->aborted_gstate;
> +             aborted_gstate = smp_load_acquire(&rq->aborted_gstate);
>       } while (u64_stats_fetch_retry(&rq->aborted_gstate_sync, start));
>  
>       return aborted_gstate;
> @@ -836,8 +836,8 @@ static void blk_mq_rq_timed_out(struct request *req, bool 
> reserved)
>                * ->aborted_gstate is set, this may lead to ignored
>                * completions and further spurious timeouts.
>                */
> -             blk_mq_rq_update_aborted_gstate(req, 0);
>               blk_add_timer(req);
> +             blk_mq_rq_update_aborted_gstate(req, 0);
>               break;
>       case BLK_EH_NOT_HANDLED:
>               break;

Hello Tejun,

With this patch applied I see requests for which it seems like the timeout 
handler
did not get invoked:

sdc/hctx0/busy:0000000095e04b7c {.op=WRITE, .cmd_flags=FAILFAST_TRANSPORT|SYNC|N
OMERGE|IDLE, .rq_flags=MQ_INFLIGHT|DONTPREP|IO_STAT|MQ_TIMEOUT_EXPIRED, .state=i
n_flight, .gstate=0xed/0xed, .tag=26, .internal_tag=-1, .cmd=Write(10) 2a 00 00 
 
00 60 ba 00 00 08 00, .retries=0, .result = 0x50000, .flags=TAGGED|INITIALIZED, 
 
.timeout=1.000, allocated 1093.180 s ago}

sdc/hctx0/busy:0000000065a64e9b {.op=WRITE, .cmd_flags=FAILFAST_TRANSPORT|SYNC|N
OMERGE|IDLE, .rq_flags=MQ_INFLIGHT|DONTPREP|IO_STAT|MQ_TIMEOUT_EXPIRED, .state=i
n_flight, .gstate=0x5/0x5, .tag=27, .internal_tag=-1, .cmd=Write(10) 2a 00 00 00
 62 d2 00 00 08 00, .retries=0, .result = 0x50000, .flags=TAGGED|INITIALIZED, .t
imeout=1.000, allocated 1093.180 s ago}

[ ... ]

sdc/hctx3/busy:00000000479cc2a9 {.op=WRITE, .cmd_flags=FAILFAST_TRANSPORT|SYNC|N
OMERGE|IDLE, .rq_flags=MQ_INFLIGHT|DONTPREP|IO_STAT, .state=in_flight, .gstate=0
x11/0x11, .tag=57, .internal_tag=-1, .cmd=Write(10) 2a 00 00 00 61 d2 00 00 08 0
0, .retries=0, .result = 0x0, .flags=TAGGED|INITIALIZED, .timeout=1.000, allocat
ed 1093.150 s ago}

sdc/hctx3/busy:000000008fd130d5 {.op=WRITE, .cmd_flags=FAILFAST_TRANSPORT|SYNC|N
OMERGE|IDLE, .rq_flags=MQ_INFLIGHT|DONTPREP|IO_STAT, .state=in_flight, .gstate=0
xd/0xd, .tag=61, .internal_tag=-1, .cmd=Write(10) 2a 00 00 00 c3 94 00 00 08 00,
 .retries=0, .result = 0x0, .flags=TAGGED|INITIALIZED, .timeout=1.000, allocated
 1093.140 s ago}

As one can see for some requests MQ_TIMEOUT_EXPIRED is set and .result = 
0x50000.
The value of .result means that the SCSI error handler has submitted an abort 
(see
also scmnd->result = DID_ABORT << 16 in drivers/infiniband/ulp/srp/ib_srp.c). 
For
the last two requests shown above however MQ_TIMEOUT_EXPIRED is not set and the
SCSI result has value 0. I think that means that it can happen that a request
times out but that the timeout handler does not get invoked ...

Thanks,

Bart.



Reply via email to