A crash dump from mccp20.qa.sw.ru looks as though scsi_eh_wakeup() failed to wakeup scsi_error_handler thread. The patch logs last few state transitons in Scsi_Host to make further investigations easier (if the bug bites us again). The patch touches only error-handling paths, so no performance impact expected.
See scsi-recover.txt (attached to JIRA) for an example of successful recovery. https://jira.sw.ru/browse/PSBM-44228 Signed-off-by: Maxim Patlasov <[email protected]> --- drivers/ata/libata-eh.c | 5 + drivers/scsi/hosts.c | 13 ++++ drivers/scsi/libsas/sas_scsi_host.c | 4 + drivers/scsi/scsi.c | 2 + drivers/scsi/scsi_dbg.h | 126 +++++++++++++++++++++++++++++++++++ drivers/scsi/scsi_error.c | 37 ++++++++-- drivers/scsi/scsi_lib.c | 11 +++ 7 files changed, 188 insertions(+), 10 deletions(-) create mode 100644 drivers/scsi/scsi_dbg.h diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index e326202..1fcae67 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -43,6 +43,7 @@ #include <scsi/scsi_cmnd.h> #include <scsi/scsi_dbg.h> #include "../scsi/scsi_transport_api.h" +#include "../scsi/scsi_dbg.h" #include <linux/libata.h> @@ -685,6 +686,7 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, * Successfully complete it. */ scmd->retries = scmd->allowed; + scsi_debug_log_cmnd(ATA_SCSI_CMD_ERROR_HANDLER_CALLS_EH_FINISH, scmd); scsi_eh_finish_cmd(scmd, &ap->eh_done_q); } } @@ -1001,6 +1003,7 @@ void ata_std_sched_eh(struct ata_port *ap) return; ata_eh_set_pending(ap, 1); + scsi_debug_log_shost(ATA_STD_SCHED_EH_CALLS_SCHEDULE_EH, ap->scsi_host); scsi_schedule_eh(ap->scsi_host); DPRINTK("port EH scheduled\n"); @@ -1023,6 +1026,7 @@ void ata_std_end_eh(struct ata_port *ap) { struct Scsi_Host *host = ap->scsi_host; + scsi_debug_log_shost(ATA_STD_END_EH_ZERO_EH_SCHEDULED, host); host->host_eh_scheduled = 0; } EXPORT_SYMBOL(ata_std_end_eh); @@ -1298,6 +1302,7 @@ static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) WARN_ON(ata_tag_valid(qc->tag)); spin_unlock_irqrestore(ap->lock, flags); + scsi_debug_log_cmnd(ATA_EH_QC_COMPLETE_CALLS_EH_FINISH, scmd); scsi_eh_finish_cmd(scmd, &ap->eh_done_q); } diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 99d22ea..287045b 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -40,6 +40,7 @@ #include "scsi_priv.h" #include "scsi_logging.h" +#include "scsi_dbg.h" static atomic_t scsi_host_next_hn = ATOMIC_INIT(0); /* host_no for next new host */ @@ -339,6 +340,7 @@ static void scsi_host_dev_release(struct device *dev) if (parent) put_device(parent); + kfree(SHOST_TO_SDBG(shost)); kfree(shost); } @@ -369,6 +371,7 @@ static struct device_type scsi_host_type = { struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) { struct Scsi_Host *shost; + struct scsi_host_dbg *sdbg; gfp_t gfp_mask = GFP_KERNEL; if (sht->unchecked_isa_dma && privsize) @@ -378,6 +381,15 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) if (!shost) return NULL; + sdbg = kzalloc(sizeof(struct scsi_host_dbg), gfp_mask); + if (!sdbg) { + kfree(shost); + return NULL; + } + + SHOST_TO_SDBG(shost) = sdbg; + spin_lock_init(&sdbg->sdbg_lock); + shost->host_lock = &shost->default_lock; spin_lock_init(shost->host_lock); shost->shost_state = SHOST_CREATED; @@ -495,6 +507,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) fail_kthread: kthread_stop(shost->ehandler); fail_kfree: + kfree(SHOST_TO_SDBG(shost)); kfree(shost); return NULL; } diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 661cfd3..9792441 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -41,6 +41,7 @@ #include "../scsi_sas_internal.h" #include "../scsi_transport_api.h" #include "../scsi_priv.h" +#include "../scsi_dbg.h" #include <linux/err.h> #include <linux/blkdev.h> @@ -262,6 +263,7 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd) * handler done list, this also takes it off the * error handler pending list. */ + scsi_debug_log_cmnd(SAS_EH_FINISH_CMD_CALLS_EH_FINISH, cmd); scsi_eh_finish_cmd(cmd, &sas_ha->eh_done_q); } @@ -513,6 +515,7 @@ static int sas_queue_reset(struct domain_device *dev, int reset_type, int lun, i set_bit(SAS_DEV_EH_PENDING, &dev->state); set_bit(reset_type, &dev->state); int_to_scsilun(lun, &dev->ssp_dev.reset_lun); + scsi_debug_log_shost(SAS_QUEUE_RESET_CALLS_SCHEDULE_EH, ha->core.shost); scsi_schedule_eh(ha->core.shost); } spin_unlock_irq(&ha->lock); @@ -848,6 +851,7 @@ out: /* check if any new eh work was scheduled during the last run */ spin_lock_irq(&ha->lock); if (ha->eh_active == 0) { + scsi_debug_log_shost(SAS_SCSI_RECOVER_HOST_ZERO_EH_SCHEDULED, shost); shost->host_eh_scheduled = 0; retry = false; } diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 9423e6b..bdde152 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -68,6 +68,7 @@ #include "scsi_priv.h" #include "scsi_logging.h" +#include "scsi_dbg.h" #define CREATE_TRACE_POINTS #include <trace/events/scsi.h> @@ -684,6 +685,7 @@ void scsi_finish_command(struct scsi_cmnd *cmd) struct scsi_driver *drv; unsigned int good_bytes; + scsi_debug_log_cmnd(SCSI_FINISH_COMMAND_CALLS_UNBUSY, cmd); scsi_device_unbusy(sdev); /* diff --git a/drivers/scsi/scsi_dbg.h b/drivers/scsi/scsi_dbg.h new file mode 100644 index 0000000..e3c5603 --- /dev/null +++ b/drivers/scsi/scsi_dbg.h @@ -0,0 +1,126 @@ +#ifndef _SCSI_DBG_H +#define _SCSI_DBG_H + +#include <scsi/scsi_cmnd.h> + +/* + * Temporary debug stuff to chase missed ehandler wakeup. + */ + +#define SCSI_HOST_DBG_N_ENTRIES 45 /* fit in one page */ + +enum scsi_dbg_type { + SCSI_HOST_QUEUE_READY_INC_HOST_BUSY = 1, /* scsi_host_queue_ready() */ + SCSI_HOST_QUEUE_READY_DEC_HOST_BUSY, + SCSI_KILL_REQUEST_INC_HOST_BUSY, + SCSI_QUEUE_RQ_DEC_HOST_BUSY, + SCSI_FINISH_COMMAND_CALLS_UNBUSY, + SCSI_QUEUE_INSERT_CALLS_UNBUSY, + SCSI_EH_SCMD_ADD_INC_HOST_FAILED, + ATA_SCSI_CMD_ERROR_HANDLER_CALLS_EH_FINISH, + ATA_EH_QC_COMPLETE_CALLS_EH_FINISH, + SAS_EH_FINISH_CMD_CALLS_EH_FINISH, + SCSI_EH_GET_SENSE_CALLS_EH_FINISH, + SCSI_EH_TEST_DEVICES_CALLS_EH_FINISH, + SCSI_EH_ABORT_CMDS_CALLS_EH_FINISH, + SCSI_EH_STU_CALLS_EH_FINISH, + SCSI_EH_BUS_DEVICE_RESET_CALLS_EH_FINISH, + SCSI_EH_TARGET_RESET_CALLS_EH_FINISH, + SCSI_EH_BUS_RESET_CALLS_EH_FINISH, + SCSI_EH_HOST_RESET_CALLS_EH_FINISH, + SCSI_EH_OFFLINE_SDEVS_CALLS_EH_FINISH, + ATA_STD_END_EH_ZERO_EH_SCHEDULED, + SAS_SCSI_RECOVER_HOST_ZERO_EH_SCHEDULED, + ATA_STD_SCHED_EH_CALLS_SCHEDULE_EH, + SAS_QUEUE_RESET_CALLS_SCHEDULE_EH, + SCSI_EH_WAKEUP_EHANDLER, + SCSI_SCHEDULE_EH_CALLS_EH_WAKEUP, + SCSI_DEVICE_UNBUSY_CALLS_EH_WAKEUP, + SCSI_ERROR_HANDLER_SLEEP, + SCSI_ERROR_HANDLER_WAKEUP, + SCSI_ERROR_HANDLER_CALLS_HANDLER, +}; + +struct scsi_host_log_entry { + enum scsi_dbg_type sle_type; + enum scsi_host_state sle_shost_state; + + int sle_host_failed; + int sle_host_busy; + int sle_host_blocked; + int sle_host_eh_scheduled; + + struct task_struct *sle_task; + char sle_comm[TASK_COMM_LEN]; + + struct scsi_device *sle_sdev; + struct scsi_cmnd *sle_cmnd; + struct request *sle_req; + + ktime_t sle_ktime; + u64 sle_jiffies; +}; + +struct scsi_host_dbg { + spinlock_t sdbg_lock; + struct scsi_host_log_entry sdbg_entries[SCSI_HOST_DBG_N_ENTRIES]; + int sdbg_next_entry; +}; + +#define SHOST_TO_SDBG(shost) (shost)->scsi_mq_reserved3 + +static inline void +scsi_debug_log(struct Scsi_Host *shost, enum scsi_dbg_type type, + struct scsi_device *sdev, struct scsi_cmnd *cmnd, + struct request *req) +{ + struct scsi_host_dbg *s = SHOST_TO_SDBG(shost); + struct scsi_host_log_entry *e; + unsigned long irq_flags; + + spin_lock_irqsave(&s->sdbg_lock, irq_flags); + e = &s->sdbg_entries[s->sdbg_next_entry]; + + e->sle_type = type; + e->sle_sdev = sdev; + e->sle_cmnd = cmnd; + e->sle_req = req; + + e->sle_shost_state = shost->shost_state; + e->sle_host_failed = shost->host_failed; + e->sle_host_busy = atomic_read(&shost->host_busy); + e->sle_host_blocked = atomic_read(&shost->host_blocked); + e->sle_host_eh_scheduled = shost->host_eh_scheduled; + + e->sle_task = current; + memcpy(e->sle_comm, current->comm, TASK_COMM_LEN); + + e->sle_ktime = ktime_get(); + e->sle_jiffies = jiffies; + + s->sdbg_next_entry++; + if (s->sdbg_next_entry == SCSI_HOST_DBG_N_ENTRIES) + s->sdbg_next_entry = 0; + spin_unlock_irqrestore(&s->sdbg_lock, irq_flags); +} + +static inline void +scsi_debug_log_cmnd(enum scsi_dbg_type type, struct scsi_cmnd *cmnd) +{ + scsi_debug_log(cmnd->device->host, type, cmnd->device, cmnd, + cmnd->request); +} + +static inline void +scsi_debug_log_shost(enum scsi_dbg_type type, struct Scsi_Host *shost) +{ + scsi_debug_log(shost, type, NULL, NULL, NULL); +} + +static inline void +scsi_debug_log_sdev(enum scsi_dbg_type type, struct scsi_device *sdev) +{ + scsi_debug_log(sdev->host, type, sdev, NULL, NULL); +} + +#endif /* _SCSI_DBG_H */ diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 5cd79de..ff85e8d 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -38,6 +38,7 @@ #include <scsi/scsi_ioctl.h> #include "scsi_priv.h" +#include "scsi_dbg.h" #include "scsi_logging.h" #include "scsi_transport_api.h" @@ -61,6 +62,7 @@ void scsi_eh_wakeup(struct Scsi_Host *shost) { if (atomic_read(&shost->host_busy) == shost->host_failed) { trace_scsi_eh_wakeup(shost); + scsi_debug_log_shost(SCSI_EH_WAKEUP_EHANDLER, shost); wake_up_process(shost->ehandler); SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost, "Waking error handler thread\n")); @@ -82,6 +84,7 @@ void scsi_schedule_eh(struct Scsi_Host *shost) if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 || scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) { shost->host_eh_scheduled++; + scsi_debug_log_shost(SCSI_SCHEDULE_EH_CALLS_EH_WAKEUP, shost); scsi_eh_wakeup(shost); } @@ -247,6 +250,7 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag) eh_flag &= ~SCSI_EH_CANCEL_CMD; scmd->eh_eflags |= eh_flag; list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q); + scsi_debug_log_cmnd(SCSI_EH_SCMD_ADD_INC_HOST_FAILED, scmd); shost->host_failed++; scsi_eh_wakeup(shost); out_unlock: @@ -1206,6 +1210,7 @@ int scsi_eh_get_sense(struct list_head *work_q, else if (rtn != NEEDS_RETRY) continue; + scsi_debug_log_cmnd(SCSI_EH_GET_SENSE_CALLS_EH_FINISH, scmd); scsi_eh_finish_cmd(scmd, done_q); } @@ -1290,8 +1295,10 @@ static int scsi_eh_test_devices(struct list_head *cmd_list, if (scmd->device == sdev) { if (finish_cmds && (try_stu || - scsi_eh_action(scmd, SUCCESS) == SUCCESS)) + scsi_eh_action(scmd, SUCCESS) == SUCCESS)) { + scsi_debug_log_cmnd(SCSI_EH_TEST_DEVICES_CALLS_EH_FINISH, scmd); scsi_eh_finish_cmd(scmd, done_q); + } else list_move_tail(&scmd->eh_entry, work_q); } @@ -1345,9 +1352,10 @@ static int scsi_eh_abort_cmds(struct list_head *work_q, return list_empty(work_q); } scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD; - if (rtn == FAST_IO_FAIL) + if (rtn == FAST_IO_FAIL) { + scsi_debug_log_cmnd(SCSI_EH_ABORT_CMDS_CALLS_EH_FINISH, scmd); scsi_eh_finish_cmd(scmd, done_q); - else + } else list_move_tail(&scmd->eh_entry, &check_list); } @@ -1425,8 +1433,10 @@ static int scsi_eh_stu(struct Scsi_Host *shost, list_for_each_entry_safe(scmd, next, work_q, eh_entry) { if (scmd->device == sdev && - scsi_eh_action(scmd, SUCCESS) == SUCCESS) + scsi_eh_action(scmd, SUCCESS) == SUCCESS) { + scsi_debug_log_cmnd(SCSI_EH_STU_CALLS_EH_FINISH, scmd); scsi_eh_finish_cmd(scmd, done_q); + } } } } else { @@ -1490,9 +1500,11 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost, list_for_each_entry_safe(scmd, next, work_q, eh_entry) { if (scmd->device == sdev && - scsi_eh_action(scmd, rtn) != FAILED) + scsi_eh_action(scmd, rtn) != FAILED) { + scsi_debug_log_cmnd(SCSI_EH_BUS_DEVICE_RESET_CALLS_EH_FINISH, scmd); scsi_eh_finish_cmd(scmd, done_q); + } } } } else { @@ -1559,9 +1571,10 @@ static int scsi_eh_target_reset(struct Scsi_Host *shost, if (rtn == SUCCESS) list_move_tail(&scmd->eh_entry, &check_list); - else if (rtn == FAST_IO_FAIL) + else if (rtn == FAST_IO_FAIL) { + scsi_debug_log_cmnd(SCSI_EH_TARGET_RESET_CALLS_EH_FINISH, scmd); scsi_eh_finish_cmd(scmd, done_q); - else + } else /* push back on work queue for further processing */ list_move(&scmd->eh_entry, work_q); } @@ -1624,10 +1637,11 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost, if (rtn == SUCCESS || rtn == FAST_IO_FAIL) { list_for_each_entry_safe(scmd, next, work_q, eh_entry) { if (channel == scmd_channel(scmd)) { - if (rtn == FAST_IO_FAIL) + if (rtn == FAST_IO_FAIL) { + scsi_debug_log_cmnd(SCSI_EH_BUS_RESET_CALLS_EH_FINISH, scmd); scsi_eh_finish_cmd(scmd, done_q); - else + } else list_move_tail(&scmd->eh_entry, &check_list); } @@ -1669,6 +1683,7 @@ static int scsi_eh_host_reset(struct Scsi_Host *shost, list_splice_init(work_q, &check_list); } else if (rtn == FAST_IO_FAIL) { list_for_each_entry_safe(scmd, next, work_q, eh_entry) { + scsi_debug_log_cmnd(SCSI_EH_HOST_RESET_CALLS_EH_FINISH, scmd); scsi_eh_finish_cmd(scmd, done_q); } } else { @@ -1700,6 +1715,7 @@ static void scsi_eh_offline_sdevs(struct list_head *work_q, * FIXME: Handle lost cmds. */ } + scsi_debug_log_cmnd(SCSI_EH_OFFLINE_SDEVS_CALLS_EH_FINISH, scmd); scsi_eh_finish_cmd(scmd, done_q); } return; @@ -2177,7 +2193,9 @@ int scsi_error_handler(void *data) shost_printk(KERN_INFO, shost, "scsi_eh_%d: sleeping\n", shost->host_no)); + scsi_debug_log_shost(SCSI_ERROR_HANDLER_SLEEP, shost); schedule(); + scsi_debug_log_shost(SCSI_ERROR_HANDLER_WAKEUP, shost); continue; } @@ -2202,6 +2220,7 @@ int scsi_error_handler(void *data) continue; } + scsi_debug_log_shost(SCSI_ERROR_HANDLER_CALLS_HANDLER, shost); if (shost->transportt->eh_strategy_handler) shost->transportt->eh_strategy_handler(shost); else diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d416acf..67b2540 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -38,6 +38,7 @@ #include "scsi_priv.h" #include "scsi_logging.h" +#include "scsi_dbg.h" #define SG_MEMPOOL_NR ARRAY_SIZE(scsi_sg_pools) @@ -155,8 +156,10 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy) * Decrement the counters, since these commands are no longer * active on the host/device. */ - if (unbusy) + if (unbusy) { + scsi_debug_log_cmnd(SCSI_QUEUE_INSERT_CALLS_UNBUSY, cmd); scsi_device_unbusy(device); + } /* * Requeue this command. It will go before all other commands @@ -320,6 +323,7 @@ void scsi_device_unbusy(struct scsi_device *sdev) if (unlikely(scsi_host_in_recovery(shost) && (shost->host_failed || shost->host_eh_scheduled))) { spin_lock_irqsave(shost->host_lock, flags); + scsi_debug_log_shost(SCSI_DEVICE_UNBUSY_CALLS_EH_WAKEUP, shost); scsi_eh_wakeup(shost); spin_unlock_irqrestore(shost->host_lock, flags); } @@ -1509,6 +1513,8 @@ static inline int scsi_host_queue_ready(struct request_queue *q, if (scsi_host_in_recovery(shost)) return 0; + scsi_debug_log_sdev(SCSI_HOST_QUEUE_READY_INC_HOST_BUSY, sdev); + busy = atomic_inc_return(&shost->host_busy) - 1; if (atomic_read(&shost->host_blocked) > 0) { if (busy) @@ -1546,6 +1552,7 @@ starved: list_add_tail(&sdev->starved_entry, &shost->starved_list); spin_unlock_irq(shost->host_lock); out_dec: + scsi_debug_log_sdev(SCSI_HOST_QUEUE_READY_DEC_HOST_BUSY, sdev); atomic_dec(&shost->host_busy); return 0; } @@ -1605,6 +1612,7 @@ static void scsi_kill_request(struct request *req, struct request_queue *q) cmd->result = DID_NO_CONNECT << 16; atomic_inc(&cmd->device->iorequest_cnt); + scsi_debug_log_cmnd(SCSI_KILL_REQUEST_INC_HOST_BUSY, cmd); /* * SCSI request completion path will do scsi_device_unbusy(), * bump busy counts. To bump the counters, we need to dance @@ -1936,6 +1944,7 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_MQ_RQ_QUEUE_OK; out_dec_host_busy: + scsi_debug_log_sdev(SCSI_QUEUE_RQ_DEC_HOST_BUSY, sdev); atomic_dec(&shost->host_busy); out_dec_target_busy: if (scsi_target(sdev)->can_queue > 0) _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
