The while loop when handling rw request may become deadloop in
case of bad card
    I've seen mmcqd gets blocked forever after a single error message:

mmcblk0: error -110 sending read/write command, response 0x900, card
status 0x80e00

    Also there was case of card reports status without error

mmcblk0: error -110 sending read/write command, response 0x900, card
status 0xe00

    After this error, the card can stay in prg state, and never comes back,
    and may not report any error further. So a break out condition
    should be set in mmc block layer:
* should not enter the waiting loop in case of error
* should break out from the waiting loop, if card response with error
* should break out from the waiting loop when timeout

    These will not help with the card, one more thing to do:
* re-init the card in case of too many errors

Signed-off-by: Ethan Du <[email protected]>
---

Changes since v1:
       * Use mmc_reinit_host instead of mmc_detect_change

 drivers/mmc/card/block.c |   36 ++++++++++++++++++++++++++++--------
 drivers/mmc/core/core.c  |   38 ++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/card.h |    3 +++
 include/linux/mmc/host.h |    1 +
 include/linux/mmc/mmc.h  |    1 +
 5 files changed, 71 insertions(+), 8 deletions(-)

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index d545f79..cc28a20 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -316,12 +316,14 @@ out:
        return err ? 0 : 1;
 }

+#define BUSY_TIMEOUT_MS (16 * 1024)
 static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 {
        struct mmc_blk_data *md = mq->data;
        struct mmc_card *card = md->queue.card;
        struct mmc_blk_request brq;
        int ret = 1, disable_multi = 0;
+       unsigned long timeout = 0;

        mmc_claim_host(card->host);

@@ -453,7 +455,9 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue
*mq, struct request *req)
                               brq.stop.resp[0], status);
                }

-               if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) {
+               if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ &&
+                   !brq.cmd.error && !brq.data.error && !brq.stop.error) {
+                       timeout = jiffies + msecs_to_jiffies(BUSY_TIMEOUT_MS);
                        do {
                                int err;

@@ -466,13 +470,22 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue
*mq, struct request *req)
                                               req->rq_disk->disk_name, err);
                                        goto cmd_err;
                                }
+                               if (cmd.resp[0] & R1_ERROR_MASK) {
+                                       printk(KERN_ERR "%s: card err %#x\n",
+                                               req->rq_disk->disk_name,
+                                               cmd.resp[0]);
+                                       goto cmd_err;
+                               }
                                /*
                                 * Some cards mishandle the status bits,
                                 * so make sure to check both the busy
                                 * indication and the card state.
                                 */
-                       } while (!(cmd.resp[0] & R1_READY_FOR_DATA) ||
-                               (R1_CURRENT_STATE(cmd.resp[0]) == 7));
+                               if ((cmd.resp[0] & R1_READY_FOR_DATA) &&
+                                   (R1_CURRENT_STATE(cmd.resp[0]) != 7))
+                                       break;
+                       } while (time_before(jiffies, timeout));
+                       /* Ignore timeout out */

 #if 0
                        if (cmd.resp[0] & ~0x00000900)
@@ -510,11 +523,11 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue
*mq, struct request *req)

        return 1;

- cmd_err:
-       /*
-        * If this is an SD card and we're writing, we can first
-        * mark the known good sectors as ok.
-        *
+cmd_err:
+       /*
+        * If this is an SD card and we're writing, we can first
+        * mark the known good sectors as ok.
+        *
         * If the card is not SD, we can still ok written sectors
         * as reported by the controller (which might be less than
         * the real number of written sectors, but never more).
@@ -534,6 +547,13 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue
*mq, struct request *req)
                spin_unlock_irq(&md->lock);
        }

+       card->err_count++;
+       if (card->err_count >= ERR_TRIGGER_REINIT) {
+               card->err_count = 0;
+               printk(KERN_WARNING "%s: re-init the card due to error\n",
+                       md->disk->disk_name);
+               mmc_reinit_host(card->host);
+       }
        mmc_release_host(card->host);

        spin_lock_irq(&md->lock);
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 5db49b1..b61de33 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1671,6 +1671,44 @@ int mmc_resume_host(struct mmc_host *host)
 }
 EXPORT_SYMBOL(mmc_resume_host);

+/**
+ *     mmc_reinit_host - reinit a host
+ *     @host: mmc host
+ */
+int mmc_reinit_host(struct mmc_host *host)
+{
+       int err = 0;
+
+       mmc_bus_get(host);
+
+       if (!host->bus_ops->resume)
+               return 0;
+
+       if (host->bus_ops && !host->bus_dead) {
+               if (!(host->pm_flags & MMC_PM_KEEP_POWER)) {
+                       mmc_power_up(host);
+                       mmc_select_voltage(host, host->ocr);
+               }
+               err = host->bus_ops->resume(host);
+               if (err) {
+                       printk(KERN_WARNING "%s: error %d during resume "
+                                           "(card was removed?)\n",
+                                           mmc_hostname(host), err);
+                       err = 0;
+               }
+       }
+       mmc_bus_put(host);
+
+       /*
+        * We add a slight delay here so that resume can progress
+        * in parallel.
+        */
+       mmc_detect_change(host, 1);
+
+       return err;
+}
+EXPORT_SYMBOL(mmc_reinit_host);
+
 /* Do the card removal on suspend if card is assumed removeable
  * Do that in pm notifier while userspace isn't yet frozen, so we will be able
    to sync the card.
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 6b75250..178de17 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -143,6 +143,9 @@ struct mmc_card {
        const char              **info;         /* info strings */
        struct sdio_func_tuple  *tuples;        /* unknown common tuples */

+       unsigned int            err_count;
+#define ERR_TRIGGER_REINIT 1024
+
        struct dentry           *debugfs_root;
 };

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 1575b52..cff9c69 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -235,6 +235,7 @@ static inline void *mmc_priv(struct mmc_host *host)

 extern int mmc_suspend_host(struct mmc_host *);
 extern int mmc_resume_host(struct mmc_host *);
+extern int mmc_reinit_host(struct mmc_host *);

 extern void mmc_power_save_host(struct mmc_host *host);
 extern void mmc_power_restore_host(struct mmc_host *host);
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index dd11ae5..3b979a1 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -122,6 +122,7 @@
 #define R1_UNDERRUN            (1 << 18)       /* ex, c */
 #define R1_OVERRUN             (1 << 17)       /* ex, c */
 #define R1_CID_CSD_OVERWRITE   (1 << 16)       /* erx, c, CID/CSD overwrite */
+#define R1_ERROR_MASK          0xffff0000
 #define R1_WP_ERASE_SKIP       (1 << 15)       /* sx, c */
 #define R1_CARD_ECC_DISABLED   (1 << 14)       /* sx, a */
 #define R1_ERASE_RESET         (1 << 13)       /* sr, c */
-- 
1.5.6.5
--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to