On Wed, Jul 25, 2018 at 02:29:30PM -0400, Peter Geis wrote:
> On 07/25/2018 02:17 PM, Jens Axboe wrote:
> > On 7/25/18 10:28 AM, Peter Geis wrote:
> > > Good Afternoon,
> > >
> > > I have encountered an issue on both Tegra 2 and Tegra 3 devices
> > > accessing emmc following the 25 July 2018 remote tracking merge.
> > >
> > > The offending commit is:
> > > 6ce3dd6eec114930cf2035a8bcb1e80477ed79a8
> > > blk-mq: issue directly if hw queue isn't busy in case of 'none'.
> >
> > Can you try my current for-next? This should fix it:
> >
> > commit 8824f62246bef288173a6624a363352f0d4d3b09
> > Author: Ming Lei <[email protected]>
> > Date: Sun Jul 22 14:10:15 2018 +0800
> >
> > blk-mq: fail the request in case issue failure
> >
>
> That commit made the current merge window, it must be reverted before
> reverting the offending commit.
>
> With that patch, the bug triggers then the kernel waits for the mmc to
> recover. It seems however that the bug leaves the mmc in a zombie state,
> where it is processing the previous command but the kernel has no control
> over it.
>
> [ 4.233073] mmc0: Got command interrupt 0x00000001 even though no command
> operation was in progress.
> [ 4.242189] mmc0: sdhci: ============ SDHCI REGISTER DUMP ===========
> [ 4.248616] mmc0: sdhci: Sys addr: 0x00000000 | Version: 0x00000001
> [ 4.255041] mmc0: sdhci: Blk size: 0x00007200 | Blk cnt: 0x00000000
> [ 4.261465] mmc0: sdhci: Argument: 0x002e3b10 | Trn mode: 0x00000033
> [ 4.267890] mmc0: sdhci: Present: 0x1ff70000 | Host ctl: 0x00000031
> [ 4.274314] mmc0: sdhci: Power: 0x0000000f | Blk gap: 0x00000000
> [ 4.280737] mmc0: sdhci: Wake-up: 0x00000000 | Clock: 0x00000007
> [ 4.287162] mmc0: sdhci: Timeout: 0x0000000e | Int stat: 0x00000002
> [ 4.293586] mmc0: sdhci: Int enab: 0x02ff000b | Sig enab: 0x02fc000b
> [ 4.300010] mmc0: sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000
> [ 4.306433] mmc0: sdhci: Caps: 0xe7ffd080 | Caps_1: 0x00000074
> [ 4.312857] mmc0: sdhci: Cmd: 0x0000123a | Max curr: 0x00969696
> [ 4.319281] mmc0: sdhci: Resp[0]: 0x00000900 | Resp[1]: 0x04800e92
> [ 4.325705] mmc0: sdhci: Resp[2]: 0x074b8000 | Resp[3]: 0x00000240
> [ 4.332128] mmc0: sdhci: Host ctl2: 0x00000000
> [ 4.336560] mmc0: sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0xae2f9220
> [ 4.342981] mmc0: sdhci: ============================================
Not found issue in the two merged patches, and it might be one mmc
internal issue, could you apply the following debug patch and post
the console log?
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index cf9c66c6d35a..6573684b07e5 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -415,6 +415,8 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
blk_mq_try_issue_list_directly(hctx, list);
if (list_empty(list))
return;
+ if (blk_queue_debug(q))
+ trace_printk("issue direct: partial done\n");
}
blk_mq_insert_requests(hctx, ctx, list);
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e13bdc2707ce..efb898323e2d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -488,6 +488,9 @@ void blk_mq_free_request(struct request *rq)
struct blk_mq_ctx *ctx = rq->mq_ctx;
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+ if (blk_queue_debug(q))
+ trace_printk("complete: rq %d\n", rq->internal_tag);
+
if (rq->rq_flags & RQF_ELVPRIV) {
if (e && e->type->ops.mq.finish_request)
e->type->ops.mq.finish_request(rq);
@@ -1793,6 +1796,9 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx
*hctx,
list_del_init(&rq->queuelist);
ret = blk_mq_request_issue_directly(rq);
+ if (blk_queue_debug(rq->q))
+ trace_printk("issue direct: rq %d, ret %d\n",
+ rq->internal_tag, (int)ret);
if (ret != BLK_STS_OK) {
if (ret == BLK_STS_RESOURCE ||
ret == BLK_STS_DEV_RESOURCE) {
@@ -1841,6 +1847,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue
*q, struct bio *bio)
return BLK_QC_T_NONE;
}
+ if (blk_queue_debug(q))
+ trace_printk("make rq %d\n", rq->internal_tag);
rq_qos_track(q, rq, bio);
cookie = request_to_qc_t(data.hctx, rq);
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 648eb6743ed5..90745829cf9a 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -257,6 +257,9 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx
*hctx,
if (mmc_card_removed(mq->card)) {
req->rq_flags |= RQF_QUIET;
+
+ if (blk_queue_debug(q))
+ trace_printk("queue rq %d, %s\n", req->internal_tag,
"io err");
return BLK_STS_IOERR;
}
@@ -266,6 +269,9 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx
*hctx,
if (mq->recovery_needed) {
spin_unlock_irq(q->queue_lock);
+ if (blk_queue_debug(q))
+ trace_printk("queue rq %d, %s\n", req->internal_tag,
+ "resource");
return BLK_STS_RESOURCE;
}
@@ -274,6 +280,9 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx
*hctx,
if (mmc_cqe_dcmd_busy(mq)) {
mq->cqe_busy |= MMC_CQE_DCMD_BUSY;
spin_unlock_irq(q->queue_lock);
+ if (blk_queue_debug(q))
+ trace_printk("queue rq %d, %s\n",
req->internal_tag,
+ "resource");
return BLK_STS_RESOURCE;
}
break;
@@ -326,6 +335,8 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx
*hctx,
break;
}
+ if (blk_queue_debug(q))
+ trace_printk("queue rq %d, %d\n", req->internal_tag, (int)ret);
if (issued != MMC_REQ_STARTED) {
bool put_card = false;
@@ -404,6 +415,8 @@ static int mmc_mq_init_queue(struct mmc_queue *mq, int
q_depth,
mq->queue->queue_lock = lock;
mq->queue->queuedata = mq;
+ blk_queue_flag_set(31, mq->queue);
+
return 0;
free_tag_set:
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 1c828e0e9905..b2b471c2f5f6 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2640,6 +2640,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32
intmask)
return;
pr_err("%s: Got command interrupt 0x%08x even though no command
operation was in progress.\n",
mmc_hostname(host->mmc), (unsigned)intmask);
+ ftrace_dump(DUMP_ALL);
sdhci_dumpregs(host);
return;
}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 050d599f5ea9..29be3a6d5903 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -741,6 +741,7 @@ bool blk_queue_flag_test_and_clear(unsigned int flag,
struct request_queue *q);
#define blk_queue_preempt_only(q) \
test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags)
#define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
+#define blk_queue_debug(q) test_bit(31, &(q)->queue_flags)
extern int blk_set_preempt_only(struct request_queue *q);
extern void blk_clear_preempt_only(struct request_queue *q);
Thanks,
Ming