Module Name: src Committed By: jdolecek Date: Wed Jul 19 19:39:28 UTC 2017
Modified Files: src/sys/dev/ata [jdolecek-ncq]: ata.c atareg.h atavar.h satafis_subr.c satafisvar.h wd.c wdvar.h Log Message: tighen and expand error handling, mostly for NCQ use cases: - make retry timeout callout per xfer, i.e. retry separately - zero whole bio struct on retry to avoid more stale state - add a REQUEUE option, which doesn't bump retry count - add ata_read_log_ext_ncq() for NCQ recovery - adjust logic for activating xfers - allow next command only when it's for same drive, several concurrent are only supported when HBA and driver support FIS-based switching - add new ata_timeout() which handles race between callout_stop() and the invokation, add appropriate handling on deactivate/free paths - stop using ch_status/ch_error in non-wdc code; later it will be dropped completely To generate a diff of this commit: cvs rdiff -u -r1.132.8.18 -r1.132.8.19 src/sys/dev/ata/ata.c cvs rdiff -u -r1.43.18.2 -r1.43.18.3 src/sys/dev/ata/atareg.h cvs rdiff -u -r1.92.8.16 -r1.92.8.17 src/sys/dev/ata/atavar.h cvs rdiff -u -r1.7.28.2 -r1.7.28.3 src/sys/dev/ata/satafis_subr.c cvs rdiff -u -r1.3 -r1.3.50.1 src/sys/dev/ata/satafisvar.h cvs rdiff -u -r1.428.2.25 -r1.428.2.26 src/sys/dev/ata/wd.c cvs rdiff -u -r1.43.4.6 -r1.43.4.7 src/sys/dev/ata/wdvar.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/ata/ata.c diff -u src/sys/dev/ata/ata.c:1.132.8.18 src/sys/dev/ata/ata.c:1.132.8.19 --- src/sys/dev/ata/ata.c:1.132.8.18 Tue Jun 27 18:36:03 2017 +++ src/sys/dev/ata/ata.c Wed Jul 19 19:39:28 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: ata.c,v 1.132.8.18 2017/06/27 18:36:03 jdolecek Exp $ */ +/* $NetBSD: ata.c,v 1.132.8.19 2017/07/19 19:39:28 jdolecek Exp $ */ /* * Copyright (c) 1998, 2001 Manuel Bouyer. All rights reserved. @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ata.c,v 1.132.8.18 2017/06/27 18:36:03 jdolecek Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ata.c,v 1.132.8.19 2017/07/19 19:39:28 jdolecek Exp $"); #include "opt_ata.h" @@ -241,14 +241,34 @@ ata_queue_get_active_xfer(struct ata_cha return xfer; } +struct ata_xfer * +ata_queue_drive_active_xfer(struct ata_channel *chp, int drive) +{ + struct ata_xfer *xfer = NULL; + + mutex_enter(&chp->ch_lock); + + TAILQ_FOREACH(xfer, &chp->ch_queue->active_xfers, c_activechain) { + if (xfer->c_drive == drive) + break; + } + KASSERT(xfer != NULL); + + mutex_exit(&chp->ch_lock); + + return xfer; +} + static void -ata_xfer_init(struct ata_xfer *xfer, bool zero) +ata_xfer_init(struct ata_xfer *xfer, uint8_t slot) { - if (zero) - memset(xfer, 0, sizeof(*xfer)); + memset(xfer, 0, sizeof(*xfer)); + + xfer->c_slot = slot; cv_init(&xfer->c_active, "ataact"); callout_init(&xfer->c_timo_callout, 0); /* XXX MPSAFE */ + callout_init(&xfer->c_retry_callout, 0); /* XXX MPSAFE */ } static void @@ -256,6 +276,8 @@ ata_xfer_destroy(struct ata_xfer *xfer) { callout_halt(&xfer->c_timo_callout, NULL); /* XXX MPSAFE */ callout_destroy(&xfer->c_timo_callout); + callout_halt(&xfer->c_retry_callout, NULL); /* XXX MPSAFE */ + callout_destroy(&xfer->c_retry_callout); cv_destroy(&xfer->c_active); } @@ -278,7 +300,7 @@ ata_queue_alloc(uint8_t openings) cv_init(&chq->queue_drain, "atdrn"); for (uint8_t i = 0; i < openings; i++) - ata_xfer_init(&chq->queue_xfers[i], false); + ata_xfer_init(&chq->queue_xfers[i], i); return chq; } @@ -1009,6 +1031,88 @@ out: return rv; } +int +ata_read_log_ext_ncq(struct ata_drive_datas *drvp, uint8_t flags, + uint8_t *slot, uint8_t *status, uint8_t *err) +{ + struct ata_xfer *xfer; + int rv; + struct ata_channel *chp = drvp->chnl_softc; + struct atac_softc *atac = chp->ch_atac; + uint8_t *tb; + + ATADEBUG_PRINT(("%s\n", __func__), DEBUG_FUNCS); + + /* Only NCQ ATA drives support/need this */ + if (drvp->drive_type != ATA_DRIVET_ATA || + (drvp->drive_flags & ATA_DRIVE_NCQ) == 0) + return EOPNOTSUPP; + + xfer = ata_get_xfer_ext(chp, false, 0); + if (xfer == NULL) { + ATADEBUG_PRINT(("%s: no xfer\n", __func__), + DEBUG_FUNCS|DEBUG_XFERS); + return EAGAIN; + } + + tb = malloc(DEV_BSIZE, M_DEVBUF, M_NOWAIT); + if (tb == NULL) { + ATADEBUG_PRINT(("%s: memory allocation failed\n", __func__), + DEBUG_FUNCS|DEBUG_XFERS); + rv = EAGAIN; + goto out; + } + memset(tb, 0, DEV_BSIZE); + + /* + * We could use READ LOG DMA EXT if drive supports it (i.e. + * when it supports Streaming feature) to avoid PIO command, + * and to make this a little faster. Realistically, it + * should not matter. + */ + xfer->c_flags |= C_IMMEDIATE; + xfer->c_ata_c.r_command = WDCC_READ_LOG_EXT; + xfer->c_ata_c.r_lba = WDCC_LOG_PAGE_NCQ; + xfer->c_ata_c.r_st_bmask = WDCS_DRDY; + xfer->c_ata_c.r_st_pmask = WDCS_DRDY; + xfer->c_ata_c.r_count = 1; + xfer->c_ata_c.r_device = WDSD_LBA; + xfer->c_ata_c.flags = AT_READ | AT_LBA | flags; + xfer->c_ata_c.timeout = 1000; /* 1s */ + xfer->c_ata_c.data = tb; + xfer->c_ata_c.bcount = DEV_BSIZE; + + if ((*atac->atac_bustype_ata->ata_exec_command)(drvp, + xfer) != ATACMD_COMPLETE) { + rv = EAGAIN; + goto out2; + } + if (xfer->c_ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) { + rv = EINVAL; + goto out2; + } + + /* XXX verify checksum and refuse if not correct (QEMU) */ + + if (tb[0] & WDCC_LOG_NQ) { + /* not a NCQ command */ + rv = EOPNOTSUPP; + goto out2; + } + + *slot = tb[0] & 0x1f; + *status = tb[2]; + *err = tb[3]; + + rv = 0; + +out2: + free(tb, DEV_BSIZE); +out: + ata_free_xfer(chp, xfer); + return rv; +} + #if NATA_DMA void ata_dmaerr(struct ata_drive_datas *drvp, int flags) @@ -1067,8 +1171,13 @@ ata_exec_xfer(struct ata_channel *chp, s mutex_enter(&chp->ch_lock); - /* insert at the end of command list */ - TAILQ_INSERT_TAIL(&chp->ch_queue->queue_xfer, xfer, c_xferchain); + /* insert at the end of command list unless specially requested */ + if (xfer->c_flags & C_IMMEDIATE) + TAILQ_INSERT_HEAD(&chp->ch_queue->queue_xfer, xfer, + c_xferchain); + else + TAILQ_INSERT_TAIL(&chp->ch_queue->queue_xfer, xfer, + c_xferchain); ATADEBUG_PRINT(("atastart from ata_exec_xfer, flags 0x%x\n", chp->ch_flags), DEBUG_XFERS); @@ -1086,7 +1195,7 @@ ata_exec_xfer(struct ata_channel *chp, s * Free xfer now if it there was attempt to free it * while we were waiting. */ - if (xfer->c_flags & C_FREE) { + if ((xfer->c_flags & (C_FREE|C_WAITTIMO)) == C_FREE) { ata_free_xfer(chp, xfer); return; } @@ -1111,6 +1220,7 @@ atastart(struct ata_channel *chp) struct atac_softc *atac = chp->ch_atac; struct ata_queue *chq = chp->ch_queue; struct ata_xfer *xfer, *axfer; + bool immediate; #ifdef ATA_DEBUG int spl1, spl2; @@ -1127,12 +1237,19 @@ atastart(struct ata_channel *chp) mutex_enter(&chp->ch_lock); + KASSERT(chq->queue_active <= chq->queue_openings); if (chq->queue_active == chq->queue_openings) { goto out; /* channel completely busy */ } + /* is there a xfer ? */ + if ((xfer = TAILQ_FIRST(&chp->ch_queue->queue_xfer)) == NULL) + goto out; + + immediate = ISSET(xfer->c_flags, C_IMMEDIATE); + /* is the queue frozen? */ - if (__predict_false(chq->queue_freeze > 0)) { + if (__predict_false(!immediate && chq->queue_freeze > 0)) { if (chq->queue_flags & QF_IDLE_WAIT) { chq->queue_flags &= ~QF_IDLE_WAIT; wakeup(&chq->queue_flags); @@ -1140,21 +1257,23 @@ atastart(struct ata_channel *chp) goto out; /* queue frozen */ } - /* is there a xfer ? */ - if ((xfer = TAILQ_FIRST(&chp->ch_queue->queue_xfer)) == NULL) - goto out; - /* all xfers on same queue must belong to the same channel */ KASSERT(xfer->c_chp == chp); /* - * Can only take NCQ command if there are no current active - * commands, or if the active commands are NCQ. Need only check - * first xfer. - */ - axfer = TAILQ_FIRST(&chp->ch_queue->active_xfers); - if (axfer && (axfer->c_flags & C_NCQ) == 0) - goto out; + * Can only take the command if there are no current active + * commands, or if the command is NCQ and the active commands are also + * NCQ. If PM is in use and HBA driver doesn't support/use FIS-based + * switching, can only send commands to single drive. + * Need only check first xfer. + * XXX FIS-based switching - revisit + */ + if (!immediate && (axfer = TAILQ_FIRST(&chp->ch_queue->active_xfers))) { + if (!ISSET(xfer->c_flags, C_NCQ) || + !ISSET(axfer->c_flags, C_NCQ) || + xfer->c_drive != axfer->c_drive) + goto out; + } struct ata_drive_datas * const drvp = &chp->ch_drive[xfer->c_drive]; @@ -1170,11 +1289,6 @@ atastart(struct ata_channel *chp) goto out; } -#ifdef DIAGNOSTIC - if ((chp->ch_flags & ATACH_IRQ_WAIT) != 0 - && chp->ch_queue->queue_openings == 1) - panic("atastart: channel waiting for irq"); -#endif ATADEBUG_PRINT(("atastart: xfer %p channel %d drive %d\n", xfer, chp->ch_channel, xfer->c_drive), DEBUG_XFERS); if (drvp->drive_flags & ATA_DRIVE_RESET) { @@ -1242,7 +1356,6 @@ retry: /* zero everything after the callout member */ memset(&xfer->c_startzero, 0, sizeof(struct ata_xfer) - offsetof(struct ata_xfer, c_startzero)); - xfer->c_slot = slot; out: mutex_exit(&chp->ch_lock); @@ -1259,7 +1372,7 @@ ata_free_xfer(struct ata_channel *chp, s mutex_enter(&chp->ch_lock); - if (xfer->c_flags & C_WAITACT) { + if (xfer->c_flags & (C_WAITACT|C_WAITTIMO)) { /* Someone is waiting for this xfer, so we can't free now */ xfer->c_flags |= C_FREE; cv_signal(&xfer->c_active); @@ -1318,6 +1431,9 @@ ata_deactivate_xfer(struct ata_channel * callout_stop(&xfer->c_timo_callout); + if (callout_invoking(&xfer->c_timo_callout)) + xfer->c_flags |= C_WAITTIMO; + TAILQ_REMOVE(&chq->active_xfers, xfer, c_activechain); chq->active_xfers_used &= ~__BIT(xfer->c_slot); chq->queue_active--; @@ -1359,6 +1475,76 @@ ata_waitdrain_xfer_check(struct ata_chan } /* + * Check for race of normal transfer handling vs. timeout. + */ +static bool +ata_timo_xfer_check(struct ata_xfer *xfer) +{ + struct ata_channel *chp = xfer->c_chp; + struct ata_drive_datas *drvp = &chp->ch_drive[xfer->c_drive]; + + mutex_enter(&chp->ch_lock); + + callout_ack(&xfer->c_timo_callout); + + if (xfer->c_flags & C_WAITTIMO) { + xfer->c_flags &= ~C_WAITTIMO; + + /* Handle race vs. ata_free_xfer() */ + if (xfer->c_flags & C_FREE) { + xfer->c_flags &= ~C_FREE; + mutex_exit(&chp->ch_lock); + + aprint_normal_dev(drvp->drv_softc, + "xfer %d freed while invoking timeout\n", + xfer->c_slot); + + ata_free_xfer(chp, xfer); + return true; + } + + /* Handle race vs. callout_stop() in ata_deactivate_xfer() */ + if (!callout_expired(&xfer->c_timo_callout)) { + mutex_exit(&chp->ch_lock); + + aprint_normal_dev(drvp->drv_softc, + "xfer %d deactivated while invoking timeout\n", + xfer->c_slot); + return true; + } + } + + mutex_exit(&chp->ch_lock); + + /* No race, proceed with timeout handling */ + return false; +} + +void +ata_timeout(void *v) +{ + struct ata_xfer *xfer = v; + int s; + + ATADEBUG_PRINT(("%s: slot %d\n", __func__, xfer->c_slot), + DEBUG_FUNCS|DEBUG_XFERS); + + s = splbio(); /* XXX MPSAFE */ + + if (ata_timo_xfer_check(xfer)) { + /* Already logged */ + goto out; + } + + /* Mark as timed out. Do not print anything, wd(4) will. */ + xfer->c_flags |= C_TIMEOU; + xfer->c_intr(xfer->c_chp, xfer, 0); + +out: + splx(s); +} + +/* * Kill off all active xfers for a ata_channel. * * Must be called at splbio(). @@ -1497,12 +1683,12 @@ ata_reset_channel(struct ata_channel *ch chp->ch_drive[drive].state = 0; chp->ch_flags &= ~ATACH_TH_RESET; - if ((flags & AT_RST_EMERG) == 0) { - ata_channel_thaw(chp); - atastart(chp); - } else { + if (flags & AT_RST_EMERG) { /* make sure that we can use polled commands */ ata_queue_reset(chp->ch_queue); + } else { + ata_channel_thaw(chp); + atastart(chp); } } Index: src/sys/dev/ata/atareg.h diff -u src/sys/dev/ata/atareg.h:1.43.18.2 src/sys/dev/ata/atareg.h:1.43.18.3 --- src/sys/dev/ata/atareg.h:1.43.18.2 Mon Apr 24 22:20:23 2017 +++ src/sys/dev/ata/atareg.h Wed Jul 19 19:39:28 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: atareg.h,v 1.43.18.2 2017/04/24 22:20:23 jdolecek Exp $ */ +/* $NetBSD: atareg.h,v 1.43.18.3 2017/07/19 19:39:28 jdolecek Exp $ */ /* * Copyright (c) 1998, 2001 Manuel Bouyer. @@ -94,10 +94,17 @@ #define WDCC_RECAL 0x10 /* disk restore code -- resets cntlr */ #define WDCC_READ 0x20 /* disk read code */ + +#define WDCC_READ_LOG_EXT 0x2f +#define WDCC_LOG_PAGE_NCQ 0x10 +#define WDCC_LOG_NQ __BIT(7) + #define WDCC_WRITE 0x30 /* disk write code */ #define WDCC__LONG 0x02 /* modifier -- access ecc bytes */ #define WDCC__NORETRY 0x01 /* modifier -- no retrys */ +#define WDCC_READ_LOG_DMA_EXT 0x47 /* DMA variant of READ_LOG_EXT */ + #define WDCC_FORMAT 0x50 /* disk format code */ #define WDCC_DIAGNOSE 0x90 /* controller diagnostic */ #define WDCC_IDP 0x91 /* initialize drive parameters */ Index: src/sys/dev/ata/atavar.h diff -u src/sys/dev/ata/atavar.h:1.92.8.16 src/sys/dev/ata/atavar.h:1.92.8.17 --- src/sys/dev/ata/atavar.h:1.92.8.16 Tue Jun 27 18:36:03 2017 +++ src/sys/dev/ata/atavar.h Wed Jul 19 19:39:28 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: atavar.h,v 1.92.8.16 2017/06/27 18:36:03 jdolecek Exp $ */ +/* $NetBSD: atavar.h,v 1.92.8.17 2017/07/19 19:39:28 jdolecek Exp $ */ /* * Copyright (c) 1998, 2001 Manuel Bouyer. @@ -65,8 +65,8 @@ struct ata_bio { #define TIMEOUT 4 /* device timed out */ #define ERR_NODEV 5 /* device has been gone */ #define ERR_RESET 6 /* command was terminated by channel reset */ +#define REQUEUE 7 /* different xfer failed, requeue command */ uint8_t r_error;/* copy of error register */ - int retries;/* number of xfer retry */ struct buf *bp; }; @@ -132,13 +132,15 @@ struct scsipi_xfer; */ struct ata_xfer { struct callout c_timo_callout; /* timeout callout handle */ + struct callout c_retry_callout; /* retry callout handle */ kcondvar_t c_active; /* somebody actively waiting for xfer */ + int8_t c_slot; /* queue slot # */ #define c_startzero c_chp /* Channel and drive that are to process the request. */ struct ata_channel *c_chp; - uint16_t c_drive; - int8_t c_slot; /* queue slot # */ + uint16_t c_drive; + uint16_t c_retries; /* number of xfer retry */ volatile u_int c_flags; /* command state flags */ void *c_databuf; /* pointer to data buffer */ @@ -146,6 +148,12 @@ struct ata_xfer { int c_skip; /* bytes already transferred */ int c_dscpoll; /* counter for dsc polling (ATAPI) */ int c_lenoff; /* offset to c_bcount (ATAPI) */ +#if 0 /* for now */ + int c_ata_status; /* copy of ATA error + status */ +#endif +#define ATACH_ERR_ST(error, status) ((error) << 8 | (status)) +#define ATACH_ERR(val) (((val) >> 8) & 0xff) +#define ATACH_ST(val) (((val) >> 0) & 0xff) union { struct ata_bio c_bio; /* ATA transfer */ @@ -159,7 +167,6 @@ struct ata_xfer { /* Link on the command queue. */ TAILQ_ENTRY(ata_xfer) c_xferchain; TAILQ_ENTRY(ata_xfer) c_activechain; - STAILQ_ENTRY(ata_xfer) c_restartchain; /* Low-level protocol handlers. */ void (*c_start)(struct ata_channel *, struct ata_xfer *); @@ -177,11 +184,14 @@ struct ata_xfer { #define C_FREE 0x0040 /* call ata_free_xfer() asap */ #define C_PIOBM 0x0080 /* command uses busmastering PIO */ #define C_NCQ 0x0100 /* command is queued */ +#define C_IMMEDIATE 0x0200 /* execute command without queuing */ +#define C_WAITTIMO 0x0400 /* race vs. timeout */ /* reasons for c_kill_xfer() */ #define KILL_GONE 1 /* device is gone while xfer was active */ #define KILL_RESET 2 /* xfer was reset */ #define KILL_GONE_INACTIVE 3 /* device is gone while xfer was pending */ +#define KILL_REQUEUE 4 /* xfer must be reissued to device, no err */ /* * While hw supports up to 32 tags, in practice we must never @@ -383,8 +393,10 @@ struct ata_channel { #define ATACH_TH_RUN 0x100 /* the kernel thread is working */ #define ATACH_TH_RESET 0x200 /* someone ask the thread to reset */ #define ATACH_TH_RESCAN 0x400 /* rescan requested */ +#if 1 /* for now */ uint8_t ch_status; /* copy of status register */ uint8_t ch_error; /* copy of error register */ +#endif /* for the reset callback */ int ch_reset_flags; @@ -483,6 +495,9 @@ void atabus_free_drives(struct ata_chann struct ataparams; int ata_get_params(struct ata_drive_datas *, uint8_t, struct ataparams *); int ata_set_mode(struct ata_drive_datas *, uint8_t, uint8_t); +int ata_read_log_ext_ncq(struct ata_drive_datas *, uint8_t, uint8_t *, + uint8_t *, uint8_t *); + /* return code for these cmds */ #define CMD_OK 0 #define CMD_ERR 1 @@ -491,10 +506,10 @@ int ata_set_mode(struct ata_drive_datas struct ata_xfer *ata_get_xfer_ext(struct ata_channel *, bool, int8_t); #define ata_get_xfer(chp) ata_get_xfer_ext((chp), true, 0); void ata_free_xfer(struct ata_channel *, struct ata_xfer *); - void ata_deactivate_xfer(struct ata_channel *, struct ata_xfer *); - void ata_exec_xfer(struct ata_channel *, struct ata_xfer *); + +void ata_timeout(void *); void ata_kill_pending(struct ata_drive_datas *); void ata_kill_active(struct ata_channel *, int, int); void ata_reset_channel(struct ata_channel *, int); @@ -521,6 +536,8 @@ struct ata_xfer * ata_queue_hwslot_to_xfer(struct ata_channel *, int); struct ata_xfer * ata_queue_get_active_xfer(struct ata_channel *); +struct ata_xfer * + ata_queue_drive_active_xfer(struct ata_channel *, int); void ata_delay(int, const char *, int); Index: src/sys/dev/ata/satafis_subr.c diff -u src/sys/dev/ata/satafis_subr.c:1.7.28.2 src/sys/dev/ata/satafis_subr.c:1.7.28.3 --- src/sys/dev/ata/satafis_subr.c:1.7.28.2 Wed Apr 19 20:49:17 2017 +++ src/sys/dev/ata/satafis_subr.c Wed Jul 19 19:39:28 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: satafis_subr.c,v 1.7.28.2 2017/04/19 20:49:17 jdolecek Exp $ */ +/* $NetBSD: satafis_subr.c,v 1.7.28.3 2017/07/19 19:39:28 jdolecek Exp $ */ /*- * Copyright (c) 2009 Jonathan A. Kollasch. @@ -51,7 +51,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: satafis_subr.c,v 1.7.28.2 2017/04/19 20:49:17 jdolecek Exp $"); +__KERNEL_RCSID(0, "$NetBSD: satafis_subr.c,v 1.7.28.3 2017/07/19 19:39:28 jdolecek Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -160,12 +160,11 @@ satafis_rhd_construct_atapi(struct ata_x } #endif /* NATAPIBUS */ -void +int satafis_rdh_parse(struct ata_channel *chp, const uint8_t *fis) { - chp->ch_status = fis[rdh_status]; - chp->ch_error = fis[rdh_error]; + return ATACH_ERR_ST(fis[rdh_error], fis[rdh_status]); } void Index: src/sys/dev/ata/satafisvar.h diff -u src/sys/dev/ata/satafisvar.h:1.3 src/sys/dev/ata/satafisvar.h:1.3.50.1 --- src/sys/dev/ata/satafisvar.h:1.3 Wed Apr 7 17:51:16 2010 +++ src/sys/dev/ata/satafisvar.h Wed Jul 19 19:39:28 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: satafisvar.h,v 1.3 2010/04/07 17:51:16 jakllsch Exp $ */ +/* $NetBSD: satafisvar.h,v 1.3.50.1 2017/07/19 19:39:28 jdolecek Exp $ */ /* * Copyright (c) 2009, 2010 Jonathan A. Kollasch. @@ -34,7 +34,7 @@ void satafis_rhd_construct_cmd(struct at void satafis_rhd_construct_bio(struct ata_xfer *, uint8_t *); void satafis_rhd_construct_atapi(struct ata_xfer *, uint8_t *); -void satafis_rdh_parse(struct ata_channel *, const uint8_t *); +int satafis_rdh_parse(struct ata_channel *, const uint8_t *); void satafis_rdh_cmd_readreg(struct ata_command *, const uint8_t *); #endif /* _DEV_ATA_FISVAR_H_ */ Index: src/sys/dev/ata/wd.c diff -u src/sys/dev/ata/wd.c:1.428.2.25 src/sys/dev/ata/wd.c:1.428.2.26 --- src/sys/dev/ata/wd.c:1.428.2.25 Mon Jul 3 19:54:44 2017 +++ src/sys/dev/ata/wd.c Wed Jul 19 19:39:28 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: wd.c,v 1.428.2.25 2017/07/03 19:54:44 jdolecek Exp $ */ +/* $NetBSD: wd.c,v 1.428.2.26 2017/07/19 19:39:28 jdolecek Exp $ */ /* * Copyright (c) 1998, 2001 Manuel Bouyer. All rights reserved. @@ -54,7 +54,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.428.2.25 2017/07/03 19:54:44 jdolecek Exp $"); +__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.428.2.26 2017/07/19 19:39:28 jdolecek Exp $"); #include "opt_ata.h" @@ -116,6 +116,7 @@ int wdcdebug_wd_mask = 0x0; #endif #ifdef WD_CHAOS_MONKEY +int wdcdebug_wd_cnt = 200; int wdcdebug_wd_chaos = 0; #endif @@ -198,7 +199,7 @@ void wdgetdefaultlabel(struct wd_softc void wdgetdisklabel(struct wd_softc *); void wdstart(device_t); void wdstart1(struct wd_softc *, struct buf *, struct ata_xfer *); -void wdrestart(void *); +static void wdbiorestart(void *); void wddone(device_t, struct ata_xfer *); static void wd_params_to_properties(struct wd_softc *); int wd_get_params(struct wd_softc *, uint8_t, struct ataparams *); @@ -302,14 +303,11 @@ wdattach(device_t parent, device_t self, wd->sc_dev = self; ATADEBUG_PRINT(("wdattach\n"), DEBUG_FUNCS | DEBUG_PROBE); - callout_init(&wd->sc_restart_ch, 0); - callout_setfunc(&wd->sc_restart_ch, wdrestart, wd); mutex_init(&wd->sc_lock, MUTEX_DEFAULT, IPL_BIO); bufq_alloc(&wd->sc_q, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK); #ifdef WD_SOFTBADSECT SLIST_INIT(&wd->sc_bslist); #endif - STAILQ_INIT(&wd->xfer_restart); wd->atabus = adev->adev_bustype; wd->drvp = adev->adev_drv_data; @@ -525,7 +523,6 @@ wddetach(device_t self, int flags) rnd_detach_source(&sc->rnd_source); mutex_destroy(&sc->sc_lock); - callout_destroy(&sc->sc_restart_ch); sc->drvp->drive_type = ATA_DRIVET_NONE; /* no drive any more here */ sc->drvp->drive_flags = 0; @@ -674,7 +671,7 @@ wdstart(device_t self) bp = bufq_get(wd->sc_q); KASSERT(bp != NULL); - xfer->c_bio.retries = 0; + xfer->c_retries = 0; wdstart1(wd, bp, xfer); } @@ -688,17 +685,21 @@ wdstart1(struct wd_softc *wd, struct buf /* must be locked on entry */ KASSERT(mutex_owned(&wd->sc_lock)); + KASSERT(bp == xfer->c_bio.bp || xfer->c_bio.bp == NULL); + KASSERT((xfer->c_flags & (C_WAITACT|C_FREE)) == 0); + + /* Reset state, so that retries don't use stale info */ + if (__predict_false(xfer->c_retries > 0)) { + xfer->c_flags = 0; + memset(&xfer->c_bio, 0, sizeof(xfer->c_bio)); + } + xfer->c_bio.blkno = bp->b_rawblkno; xfer->c_bio.bcount = bp->b_bcount; xfer->c_bio.databuf = bp->b_data; xfer->c_bio.blkdone = 0; - KASSERT(bp == xfer->c_bio.bp || xfer->c_bio.bp == NULL); xfer->c_bio.bp = bp; - /* Reset state flags, so that retries don't use stale info */ - KASSERT((xfer->c_flags & (C_WAITACT|C_FREE)) == 0); - xfer->c_flags = 0; - #ifdef WD_CHAOS_MONKEY /* * Override blkno to be over device capacity to trigger error, @@ -706,7 +707,8 @@ wdstart1(struct wd_softc *wd, struct buf * the command be clipped, or otherwise misinterpreted, by the * driver or controller. */ - if (BUF_ISREAD(bp) && (++wdcdebug_wd_chaos % WD_CHAOS_MONKEY) == 0) { + if (BUF_ISREAD(bp) && xfer->c_retries == 0 && wdcdebug_wd_cnt > 0 && + (++wdcdebug_wd_chaos % wdcdebug_wd_cnt) == 0) { aprint_normal_dev(wd->sc_dev, "%s: chaos xfer %d\n", __func__, xfer->c_slot); xfer->c_bio.blkno = 7777777 + wd->sc_capacity; @@ -718,7 +720,7 @@ wdstart1(struct wd_softc *wd, struct buf * the sector number of the problem, and will eventually allow the * transfer to succeed. */ - if (xfer->c_bio.retries >= WDIORETRIES_SINGLE) + if (xfer->c_retries >= WDIORETRIES_SINGLE) xfer->c_bio.flags = ATA_SINGLE; else xfer->c_bio.flags = 0; @@ -739,7 +741,7 @@ wdstart1(struct wd_softc *wd, struct buf * retrying with NCQ. */ if (wd->drvp->drive_flags & ATA_DRIVE_NCQ && - (xfer->c_bio.retries == 0 || (bp->b_flags & B_MEDIA_FUA))) { + (xfer->c_retries == 0 || (bp->b_flags & B_MEDIA_FUA))) { xfer->c_bio.flags |= ATA_LBA48; xfer->c_flags |= C_NCQ; @@ -785,7 +787,8 @@ wddone(device_t self, struct ata_xfer *x ATADEBUG_PRINT(("wddone %s\n", device_xname(wd->sc_dev)), DEBUG_XFERS); - if (wddoingadump) { + + if (__predict_false(wddoingadump)) { /* just drop it to the floor */ ata_free_xfer(wd->drvp->chnl_softc, xfer); return; @@ -805,6 +808,9 @@ wddone(device_t self, struct ata_xfer *x case TIMEOUT: errmsg = "device timeout"; goto retry; + case REQUEUE: + errmsg = "requeue"; + goto retry2; case ERR_RESET: errmsg = "channel reset"; goto retry2; @@ -822,25 +828,27 @@ retry2: diskerr(bp, "wd", errmsg, LOG_PRINTF, xfer->c_bio.blkdone, wd->sc_dk.dk_label); - if (xfer->c_bio.retries < WDIORETRIES) - printf(", retrying %d", xfer->c_bio.retries + 1); + if (xfer->c_retries < WDIORETRIES) + printf(", slot %d, retry %d", xfer->c_slot, + xfer->c_retries + 1); printf("\n"); if (do_perror) wdperror(wd, xfer); - if (xfer->c_bio.retries < WDIORETRIES) { - xfer->c_bio.retries++; - STAILQ_INSERT_TAIL(&wd->xfer_restart, xfer, - c_restartchain); - - /* - * Only restart the timer if it's not already pending, - * so that we wouldn't postpone processing beyond - * original schedule. - */ - if (!callout_pending(&wd->sc_restart_ch)) { - callout_schedule(&wd->sc_restart_ch, - RECOVERYTIME); + + if (xfer->c_retries < WDIORETRIES) { + int timo; + + if (xfer->c_bio.error == REQUEUE) { + /* rerun ASAP, and do not count as retry */ + timo = 1; + } else { + xfer->c_retries++; + timo = RECOVERYTIME; } + + callout_reset(&xfer->c_retry_callout, timo, + wdbiorestart, xfer); + mutex_exit(&wd->sc_lock); return; } @@ -881,7 +889,7 @@ out: bp->b_error = EIO; break; case NOERROR: -noerror: if ((xfer->c_bio.flags & ATA_CORR) || xfer->c_bio.retries > 0) +noerror: if ((xfer->c_bio.flags & ATA_CORR) || xfer->c_retries > 0) aprint_error_dev(wd->sc_dev, "soft error (corrected)\n"); break; @@ -905,27 +913,18 @@ noerror: if ((xfer->c_bio.flags & ATA_CO ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive); } -void -wdrestart(void *v) +static void +wdbiorestart(void *v) { - struct wd_softc *wd = v; - struct ata_xfer *xfer; + struct ata_xfer *xfer = v; + struct buf *bp = xfer->c_bio.bp; + struct wd_softc *wd = device_lookup_private(&wd_cd, WDUNIT(bp->b_dev)); ATADEBUG_PRINT(("wdrestart %s\n", device_xname(wd->sc_dev)), DEBUG_XFERS); - /* - * Resend all failed xfers out immediatelly regardless of original - * schedule, so that we error out reasonably fast in case of massive - * permanent errors. - */ mutex_enter(&wd->sc_lock); - while (!STAILQ_EMPTY(&wd->xfer_restart)) { - xfer = STAILQ_FIRST(&wd->xfer_restart); - STAILQ_REMOVE_HEAD(&wd->xfer_restart, c_restartchain); - - wdstart1(v, xfer->c_bio.bp, xfer); - } + wdstart1(wd, bp, xfer); mutex_exit(&wd->sc_lock); } Index: src/sys/dev/ata/wdvar.h diff -u src/sys/dev/ata/wdvar.h:1.43.4.6 src/sys/dev/ata/wdvar.h:1.43.4.7 --- src/sys/dev/ata/wdvar.h:1.43.4.6 Fri Jun 23 20:40:51 2017 +++ src/sys/dev/ata/wdvar.h Wed Jul 19 19:39:28 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: wdvar.h,v 1.43.4.6 2017/06/23 20:40:51 jdolecek Exp $ */ +/* $NetBSD: wdvar.h,v 1.43.4.7 2017/07/19 19:39:28 jdolecek Exp $ */ /* * Copyright (c) 1998, 2001 Manuel Bouyer. @@ -38,7 +38,6 @@ struct wd_softc { device_t sc_dev; struct disk sc_dk; struct bufq_state *sc_q; - struct callout sc_restart_ch; kmutex_t sc_lock; int sc_quirks; /* any quirks drive might have */ @@ -69,8 +68,6 @@ struct wd_softc { u_int sc_bscount; #endif krndsource_t rnd_source; - - STAILQ_HEAD(, ata_xfer) xfer_restart; }; #endif /* _DEV_ATA_WDVAR_H_ */