Module Name:    src
Committed By:   jdolecek
Date:           Wed Jul 19 19:39:28 UTC 2017

Modified Files:
        src/sys/dev/ata [jdolecek-ncq]: ata.c atareg.h atavar.h satafis_subr.c
            satafisvar.h wd.c wdvar.h

Log Message:
tighen and expand error handling, mostly for NCQ use cases:
- make retry timeout callout per xfer, i.e. retry separately
- zero whole bio struct on retry to avoid more stale state
- add a REQUEUE option, which doesn't bump retry count
- add ata_read_log_ext_ncq() for NCQ recovery
- adjust logic for activating xfers - allow next command only when
  it's for same drive, several concurrent are only supported when HBA
  and driver support FIS-based switching
- add new ata_timeout() which handles race between callout_stop()
  and the invokation, add appropriate handling on deactivate/free paths
- stop using ch_status/ch_error in non-wdc code; later it will be dropped
  completely


To generate a diff of this commit:
cvs rdiff -u -r1.132.8.18 -r1.132.8.19 src/sys/dev/ata/ata.c
cvs rdiff -u -r1.43.18.2 -r1.43.18.3 src/sys/dev/ata/atareg.h
cvs rdiff -u -r1.92.8.16 -r1.92.8.17 src/sys/dev/ata/atavar.h
cvs rdiff -u -r1.7.28.2 -r1.7.28.3 src/sys/dev/ata/satafis_subr.c
cvs rdiff -u -r1.3 -r1.3.50.1 src/sys/dev/ata/satafisvar.h
cvs rdiff -u -r1.428.2.25 -r1.428.2.26 src/sys/dev/ata/wd.c
cvs rdiff -u -r1.43.4.6 -r1.43.4.7 src/sys/dev/ata/wdvar.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/ata/ata.c
diff -u src/sys/dev/ata/ata.c:1.132.8.18 src/sys/dev/ata/ata.c:1.132.8.19
--- src/sys/dev/ata/ata.c:1.132.8.18	Tue Jun 27 18:36:03 2017
+++ src/sys/dev/ata/ata.c	Wed Jul 19 19:39:28 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: ata.c,v 1.132.8.18 2017/06/27 18:36:03 jdolecek Exp $	*/
+/*	$NetBSD: ata.c,v 1.132.8.19 2017/07/19 19:39:28 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ata.c,v 1.132.8.18 2017/06/27 18:36:03 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ata.c,v 1.132.8.19 2017/07/19 19:39:28 jdolecek Exp $");
 
 #include "opt_ata.h"
 
@@ -241,14 +241,34 @@ ata_queue_get_active_xfer(struct ata_cha
 	return xfer;
 }
 
+struct ata_xfer *
+ata_queue_drive_active_xfer(struct ata_channel *chp, int drive)
+{
+	struct ata_xfer *xfer = NULL;
+
+	mutex_enter(&chp->ch_lock);
+
+	TAILQ_FOREACH(xfer, &chp->ch_queue->active_xfers, c_activechain) {
+		if (xfer->c_drive == drive)
+			break;
+	}
+	KASSERT(xfer != NULL);
+
+	mutex_exit(&chp->ch_lock);
+
+	return xfer;
+}
+
 static void
-ata_xfer_init(struct ata_xfer *xfer, bool zero)
+ata_xfer_init(struct ata_xfer *xfer, uint8_t slot)
 {
-	if (zero)
-		memset(xfer, 0, sizeof(*xfer));
+	memset(xfer, 0, sizeof(*xfer));
+
+	xfer->c_slot = slot;
 
 	cv_init(&xfer->c_active, "ataact");
 	callout_init(&xfer->c_timo_callout, 0); 	/* XXX MPSAFE */
+	callout_init(&xfer->c_retry_callout, 0); 	/* XXX MPSAFE */
 }
 
 static void
@@ -256,6 +276,8 @@ ata_xfer_destroy(struct ata_xfer *xfer)
 {
 	callout_halt(&xfer->c_timo_callout, NULL);	/* XXX MPSAFE */
 	callout_destroy(&xfer->c_timo_callout);
+	callout_halt(&xfer->c_retry_callout, NULL);	/* XXX MPSAFE */
+	callout_destroy(&xfer->c_retry_callout);
 	cv_destroy(&xfer->c_active);
 }
 
@@ -278,7 +300,7 @@ ata_queue_alloc(uint8_t openings)
 	cv_init(&chq->queue_drain, "atdrn");
 
 	for (uint8_t i = 0; i < openings; i++)
-		ata_xfer_init(&chq->queue_xfers[i], false);
+		ata_xfer_init(&chq->queue_xfers[i], i);
 
 	return chq;
 }
@@ -1009,6 +1031,88 @@ out:
 	return rv;
 }
 
+int
+ata_read_log_ext_ncq(struct ata_drive_datas *drvp, uint8_t flags,
+    uint8_t *slot, uint8_t *status, uint8_t *err)
+{
+	struct ata_xfer *xfer;
+	int rv;
+	struct ata_channel *chp = drvp->chnl_softc;
+	struct atac_softc *atac = chp->ch_atac;
+	uint8_t *tb;
+
+	ATADEBUG_PRINT(("%s\n", __func__), DEBUG_FUNCS);
+
+	/* Only NCQ ATA drives support/need this */
+	if (drvp->drive_type != ATA_DRIVET_ATA ||
+	    (drvp->drive_flags & ATA_DRIVE_NCQ) == 0)
+		return EOPNOTSUPP;
+
+	xfer = ata_get_xfer_ext(chp, false, 0);
+	if (xfer == NULL) {
+		ATADEBUG_PRINT(("%s: no xfer\n", __func__),
+		    DEBUG_FUNCS|DEBUG_XFERS);
+		return EAGAIN;
+	}
+
+	tb = malloc(DEV_BSIZE, M_DEVBUF, M_NOWAIT);
+	if (tb == NULL) {
+		ATADEBUG_PRINT(("%s: memory allocation failed\n", __func__),
+		    DEBUG_FUNCS|DEBUG_XFERS);
+		rv = EAGAIN;
+		goto out;
+	}
+	memset(tb, 0, DEV_BSIZE);
+
+	/*
+	 * We could use READ LOG DMA EXT if drive supports it (i.e.
+	 * when it supports Streaming feature) to avoid PIO command,
+	 * and to make this a little faster. Realistically, it
+	 * should not matter.
+	 */
+	xfer->c_flags |= C_IMMEDIATE;
+	xfer->c_ata_c.r_command = WDCC_READ_LOG_EXT;
+	xfer->c_ata_c.r_lba = WDCC_LOG_PAGE_NCQ;
+	xfer->c_ata_c.r_st_bmask = WDCS_DRDY;
+	xfer->c_ata_c.r_st_pmask = WDCS_DRDY;
+	xfer->c_ata_c.r_count = 1;
+	xfer->c_ata_c.r_device = WDSD_LBA;
+	xfer->c_ata_c.flags = AT_READ | AT_LBA | flags;
+	xfer->c_ata_c.timeout = 1000; /* 1s */
+	xfer->c_ata_c.data = tb;
+	xfer->c_ata_c.bcount = DEV_BSIZE;
+
+	if ((*atac->atac_bustype_ata->ata_exec_command)(drvp,
+						xfer) != ATACMD_COMPLETE) {
+		rv = EAGAIN;
+		goto out2;
+	}
+	if (xfer->c_ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
+		rv = EINVAL;
+		goto out2;
+	}
+
+	/* XXX verify checksum and refuse if not correct (QEMU) */
+
+	if (tb[0] & WDCC_LOG_NQ) {
+		/* not a NCQ command */
+		rv = EOPNOTSUPP;
+		goto out2;
+	}
+
+	*slot = tb[0] & 0x1f;
+	*status = tb[2];
+	*err = tb[3];
+
+	rv = 0;
+
+out2:
+	free(tb, DEV_BSIZE);
+out:
+	ata_free_xfer(chp, xfer);
+	return rv;
+}
+
 #if NATA_DMA
 void
 ata_dmaerr(struct ata_drive_datas *drvp, int flags)
@@ -1067,8 +1171,13 @@ ata_exec_xfer(struct ata_channel *chp, s
 
 	mutex_enter(&chp->ch_lock);
 
-	/* insert at the end of command list */
-	TAILQ_INSERT_TAIL(&chp->ch_queue->queue_xfer, xfer, c_xferchain);
+	/* insert at the end of command list unless specially requested */
+	if (xfer->c_flags & C_IMMEDIATE)
+		TAILQ_INSERT_HEAD(&chp->ch_queue->queue_xfer, xfer,
+		    c_xferchain);
+	else
+		TAILQ_INSERT_TAIL(&chp->ch_queue->queue_xfer, xfer,
+		    c_xferchain);
 	ATADEBUG_PRINT(("atastart from ata_exec_xfer, flags 0x%x\n",
 	    chp->ch_flags), DEBUG_XFERS);
 
@@ -1086,7 +1195,7 @@ ata_exec_xfer(struct ata_channel *chp, s
 			 * Free xfer now if it there was attempt to free it
 			 * while we were waiting.
 			 */
-			if (xfer->c_flags & C_FREE) {
+			if ((xfer->c_flags & (C_FREE|C_WAITTIMO)) == C_FREE) {
 				ata_free_xfer(chp, xfer);
 				return;
 			}
@@ -1111,6 +1220,7 @@ atastart(struct ata_channel *chp)
 	struct atac_softc *atac = chp->ch_atac;
 	struct ata_queue *chq = chp->ch_queue;
 	struct ata_xfer *xfer, *axfer;
+	bool immediate;
 
 #ifdef ATA_DEBUG
 	int spl1, spl2;
@@ -1127,12 +1237,19 @@ atastart(struct ata_channel *chp)
 
 	mutex_enter(&chp->ch_lock);
 
+	KASSERT(chq->queue_active <= chq->queue_openings);
 	if (chq->queue_active == chq->queue_openings) {
 		goto out; /* channel completely busy */
 	}
 
+	/* is there a xfer ? */
+	if ((xfer = TAILQ_FIRST(&chp->ch_queue->queue_xfer)) == NULL)
+		goto out;
+
+	immediate = ISSET(xfer->c_flags, C_IMMEDIATE);
+
 	/* is the queue frozen? */
-	if (__predict_false(chq->queue_freeze > 0)) {
+	if (__predict_false(!immediate && chq->queue_freeze > 0)) {
 		if (chq->queue_flags & QF_IDLE_WAIT) {
 			chq->queue_flags &= ~QF_IDLE_WAIT;
 			wakeup(&chq->queue_flags);
@@ -1140,21 +1257,23 @@ atastart(struct ata_channel *chp)
 		goto out; /* queue frozen */
 	}
 
-	/* is there a xfer ? */
-	if ((xfer = TAILQ_FIRST(&chp->ch_queue->queue_xfer)) == NULL)
-		goto out;
-
 	/* all xfers on same queue must belong to the same channel */
 	KASSERT(xfer->c_chp == chp);
 
 	/*
-	 * Can only take NCQ command if there are no current active
-	 * commands, or if the active commands are NCQ. Need only check
-	 * first xfer.
-	 */
-	axfer = TAILQ_FIRST(&chp->ch_queue->active_xfers);
-	if (axfer && (axfer->c_flags & C_NCQ) == 0)
-		goto out;
+	 * Can only take the command if there are no current active
+	 * commands, or if the command is NCQ and the active commands are also
+	 * NCQ. If PM is in use and HBA driver doesn't support/use FIS-based
+	 * switching, can only send commands to single drive.
+	 * Need only check first xfer.
+	 * XXX FIS-based switching - revisit
+	 */
+	if (!immediate && (axfer = TAILQ_FIRST(&chp->ch_queue->active_xfers))) {
+		if (!ISSET(xfer->c_flags, C_NCQ) ||
+		    !ISSET(axfer->c_flags, C_NCQ) ||
+		    xfer->c_drive != axfer->c_drive)
+			goto out;
+	}
 
 	struct ata_drive_datas * const drvp = &chp->ch_drive[xfer->c_drive];
 
@@ -1170,11 +1289,6 @@ atastart(struct ata_channel *chp)
 		goto out;
 	}
 
-#ifdef DIAGNOSTIC
-	if ((chp->ch_flags & ATACH_IRQ_WAIT) != 0
-	    && chp->ch_queue->queue_openings == 1)
-		panic("atastart: channel waiting for irq");
-#endif
 	ATADEBUG_PRINT(("atastart: xfer %p channel %d drive %d\n", xfer,
 	    chp->ch_channel, xfer->c_drive), DEBUG_XFERS);
 	if (drvp->drive_flags & ATA_DRIVE_RESET) {
@@ -1242,7 +1356,6 @@ retry:
 	/* zero everything after the callout member */
 	memset(&xfer->c_startzero, 0,
 	    sizeof(struct ata_xfer) - offsetof(struct ata_xfer, c_startzero));
-	xfer->c_slot = slot;
 
 out:
 	mutex_exit(&chp->ch_lock);
@@ -1259,7 +1372,7 @@ ata_free_xfer(struct ata_channel *chp, s
 
 	mutex_enter(&chp->ch_lock);
 
-	if (xfer->c_flags & C_WAITACT) {
+	if (xfer->c_flags & (C_WAITACT|C_WAITTIMO)) {
 		/* Someone is waiting for this xfer, so we can't free now */
 		xfer->c_flags |= C_FREE;
 		cv_signal(&xfer->c_active);
@@ -1318,6 +1431,9 @@ ata_deactivate_xfer(struct ata_channel *
 
 	callout_stop(&xfer->c_timo_callout);
 
+	if (callout_invoking(&xfer->c_timo_callout))
+		xfer->c_flags |= C_WAITTIMO;
+
 	TAILQ_REMOVE(&chq->active_xfers, xfer, c_activechain);
 	chq->active_xfers_used &= ~__BIT(xfer->c_slot);
 	chq->queue_active--;
@@ -1359,6 +1475,76 @@ ata_waitdrain_xfer_check(struct ata_chan
 }
 
 /*
+ * Check for race of normal transfer handling vs. timeout.
+ */
+static bool
+ata_timo_xfer_check(struct ata_xfer *xfer)
+{
+	struct ata_channel *chp = xfer->c_chp;
+	struct ata_drive_datas *drvp = &chp->ch_drive[xfer->c_drive];
+
+	mutex_enter(&chp->ch_lock);
+
+	callout_ack(&xfer->c_timo_callout);
+
+	if (xfer->c_flags & C_WAITTIMO) {
+		xfer->c_flags &= ~C_WAITTIMO;
+
+		/* Handle race vs. ata_free_xfer() */
+		if (xfer->c_flags & C_FREE) {
+			xfer->c_flags &= ~C_FREE;
+			mutex_exit(&chp->ch_lock);
+
+	    		aprint_normal_dev(drvp->drv_softc,
+			    "xfer %d freed while invoking timeout\n",
+			    xfer->c_slot); 
+
+			ata_free_xfer(chp, xfer);
+			return true;
+		}
+
+		/* Handle race vs. callout_stop() in ata_deactivate_xfer() */
+		if (!callout_expired(&xfer->c_timo_callout)) {
+			mutex_exit(&chp->ch_lock);
+
+	    		aprint_normal_dev(drvp->drv_softc,
+			    "xfer %d deactivated while invoking timeout\n",
+			    xfer->c_slot); 
+			return true;
+		}
+	}
+
+	mutex_exit(&chp->ch_lock);
+
+	/* No race, proceed with timeout handling */
+	return false;
+}
+
+void
+ata_timeout(void *v)
+{
+	struct ata_xfer *xfer = v;
+	int s;
+
+	ATADEBUG_PRINT(("%s: slot %d\n", __func__, xfer->c_slot),
+	    DEBUG_FUNCS|DEBUG_XFERS);
+
+	s = splbio();				/* XXX MPSAFE */
+
+	if (ata_timo_xfer_check(xfer)) {
+		/* Already logged */
+		goto out;
+	}
+
+	/* Mark as timed out. Do not print anything, wd(4) will. */
+	xfer->c_flags |= C_TIMEOU;
+	xfer->c_intr(xfer->c_chp, xfer, 0);
+
+out:
+	splx(s);
+}
+
+/*
  * Kill off all active xfers for a ata_channel.
  *
  * Must be called at splbio().
@@ -1497,12 +1683,12 @@ ata_reset_channel(struct ata_channel *ch
 		chp->ch_drive[drive].state = 0;
 
 	chp->ch_flags &= ~ATACH_TH_RESET;
-	if ((flags & AT_RST_EMERG) == 0)  {
-		ata_channel_thaw(chp);
-		atastart(chp);
-	} else {
+	if (flags & AT_RST_EMERG) {
 		/* make sure that we can use polled commands */
 		ata_queue_reset(chp->ch_queue);
+	} else {
+		ata_channel_thaw(chp);
+		atastart(chp);
 	}
 }
 

Index: src/sys/dev/ata/atareg.h
diff -u src/sys/dev/ata/atareg.h:1.43.18.2 src/sys/dev/ata/atareg.h:1.43.18.3
--- src/sys/dev/ata/atareg.h:1.43.18.2	Mon Apr 24 22:20:23 2017
+++ src/sys/dev/ata/atareg.h	Wed Jul 19 19:39:28 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: atareg.h,v 1.43.18.2 2017/04/24 22:20:23 jdolecek Exp $	*/
+/*	$NetBSD: atareg.h,v 1.43.18.3 2017/07/19 19:39:28 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.
@@ -94,10 +94,17 @@
 #define	WDCC_RECAL		0x10	/* disk restore code -- resets cntlr */
 
 #define	WDCC_READ		0x20	/* disk read code */
+
+#define	WDCC_READ_LOG_EXT	0x2f
+#define	 WDCC_LOG_PAGE_NCQ	0x10
+#define	 WDCC_LOG_NQ		__BIT(7)
+
 #define	WDCC_WRITE		0x30	/* disk write code */
 #define	 WDCC__LONG		 0x02	/* modifier -- access ecc bytes */
 #define	 WDCC__NORETRY		 0x01	/* modifier -- no retrys */
 
+#define	WDCC_READ_LOG_DMA_EXT	0x47	/* DMA variant of READ_LOG_EXT */
+
 #define	WDCC_FORMAT		0x50	/* disk format code */
 #define	WDCC_DIAGNOSE		0x90	/* controller diagnostic */
 #define	WDCC_IDP		0x91	/* initialize drive parameters */

Index: src/sys/dev/ata/atavar.h
diff -u src/sys/dev/ata/atavar.h:1.92.8.16 src/sys/dev/ata/atavar.h:1.92.8.17
--- src/sys/dev/ata/atavar.h:1.92.8.16	Tue Jun 27 18:36:03 2017
+++ src/sys/dev/ata/atavar.h	Wed Jul 19 19:39:28 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: atavar.h,v 1.92.8.16 2017/06/27 18:36:03 jdolecek Exp $	*/
+/*	$NetBSD: atavar.h,v 1.92.8.17 2017/07/19 19:39:28 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.
@@ -65,8 +65,8 @@ struct ata_bio {
 #define	TIMEOUT		4	/* device timed out */
 #define	ERR_NODEV	5	/* device has been gone */
 #define ERR_RESET	6	/* command was terminated by channel reset */
+#define REQUEUE		7	/* different xfer failed, requeue command */
 	uint8_t		r_error;/* copy of error register */
-	int		retries;/* number of xfer retry */
 	struct buf	*bp;
 };
 
@@ -132,13 +132,15 @@ struct scsipi_xfer;
  */
 struct ata_xfer {
 	struct callout c_timo_callout;	/* timeout callout handle */
+	struct callout c_retry_callout;	/* retry callout handle */
 	kcondvar_t c_active;		/* somebody actively waiting for xfer */
+	int8_t c_slot;			/* queue slot # */
 
 #define c_startzero	c_chp
 	/* Channel and drive that are to process the request. */
 	struct ata_channel *c_chp;
-	uint16_t c_drive;
-	int8_t c_slot;			/* queue slot # */
+	uint16_t	c_drive;
+	uint16_t	c_retries;	/* number of xfer retry */
 
 	volatile u_int c_flags;		/* command state flags */
 	void	*c_databuf;		/* pointer to data buffer */
@@ -146,6 +148,12 @@ struct ata_xfer {
 	int	c_skip;			/* bytes already transferred */
 	int	c_dscpoll;		/* counter for dsc polling (ATAPI) */
 	int	c_lenoff;		/* offset to c_bcount (ATAPI) */
+#if 0 /* for now */
+	int	c_ata_status;		/* copy of ATA error + status */
+#endif
+#define ATACH_ERR_ST(error, status)	((error) << 8 | (status))
+#define ATACH_ERR(val)			(((val) >> 8) & 0xff)
+#define ATACH_ST(val)			(((val) >> 0) & 0xff)
 
 	union {
 		struct ata_bio	c_bio;		/* ATA transfer */
@@ -159,7 +167,6 @@ struct ata_xfer {
 	/* Link on the command queue. */
 	TAILQ_ENTRY(ata_xfer) c_xferchain;
 	TAILQ_ENTRY(ata_xfer) c_activechain;
-	STAILQ_ENTRY(ata_xfer) c_restartchain;
 
 	/* Low-level protocol handlers. */
 	void	(*c_start)(struct ata_channel *, struct ata_xfer *);
@@ -177,11 +184,14 @@ struct ata_xfer {
 #define C_FREE		0x0040		/* call ata_free_xfer() asap */
 #define C_PIOBM		0x0080		/* command uses busmastering PIO */
 #define	C_NCQ		0x0100		/* command is queued  */
+#define C_IMMEDIATE	0x0200		/* execute command without queuing */
+#define C_WAITTIMO	0x0400		/* race vs. timeout */
 
 /* reasons for c_kill_xfer() */
 #define KILL_GONE 1		/* device is gone while xfer was active */
 #define KILL_RESET 2		/* xfer was reset */
 #define KILL_GONE_INACTIVE 3	/* device is gone while xfer was pending */
+#define KILL_REQUEUE	4	/* xfer must be reissued to device, no err */
 
 /*
  * While hw supports up to 32 tags, in practice we must never
@@ -383,8 +393,10 @@ struct ata_channel {
 #define ATACH_TH_RUN   0x100	/* the kernel thread is working */
 #define ATACH_TH_RESET 0x200	/* someone ask the thread to reset */
 #define ATACH_TH_RESCAN 0x400	/* rescan requested */
+#if 1 /* for now */
 	uint8_t ch_status;	/* copy of status register */
 	uint8_t ch_error;	/* copy of error register */
+#endif
 
 	/* for the reset callback */
 	int ch_reset_flags;
@@ -483,6 +495,9 @@ void	atabus_free_drives(struct ata_chann
 struct ataparams;
 int	ata_get_params(struct ata_drive_datas *, uint8_t, struct ataparams *);
 int	ata_set_mode(struct ata_drive_datas *, uint8_t, uint8_t);
+int	ata_read_log_ext_ncq(struct ata_drive_datas *, uint8_t, uint8_t *,
+    uint8_t *, uint8_t *);
+
 /* return code for these cmds */
 #define CMD_OK    0
 #define CMD_ERR   1
@@ -491,10 +506,10 @@ int	ata_set_mode(struct ata_drive_datas 
 struct ata_xfer *ata_get_xfer_ext(struct ata_channel *, bool, int8_t);
 #define ata_get_xfer(chp) ata_get_xfer_ext((chp), true, 0);
 void	ata_free_xfer(struct ata_channel *, struct ata_xfer *);
-
 void	ata_deactivate_xfer(struct ata_channel *, struct ata_xfer *);
-
 void	ata_exec_xfer(struct ata_channel *, struct ata_xfer *);
+
+void	ata_timeout(void *);
 void	ata_kill_pending(struct ata_drive_datas *);
 void	ata_kill_active(struct ata_channel *, int, int);
 void	ata_reset_channel(struct ata_channel *, int);
@@ -521,6 +536,8 @@ struct ata_xfer *
 	ata_queue_hwslot_to_xfer(struct ata_channel *, int);
 struct ata_xfer *
 	ata_queue_get_active_xfer(struct ata_channel *);
+struct ata_xfer *
+	ata_queue_drive_active_xfer(struct ata_channel *, int);
 
 void	ata_delay(int, const char *, int);
 

Index: src/sys/dev/ata/satafis_subr.c
diff -u src/sys/dev/ata/satafis_subr.c:1.7.28.2 src/sys/dev/ata/satafis_subr.c:1.7.28.3
--- src/sys/dev/ata/satafis_subr.c:1.7.28.2	Wed Apr 19 20:49:17 2017
+++ src/sys/dev/ata/satafis_subr.c	Wed Jul 19 19:39:28 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: satafis_subr.c,v 1.7.28.2 2017/04/19 20:49:17 jdolecek Exp $ */
+/* $NetBSD: satafis_subr.c,v 1.7.28.3 2017/07/19 19:39:28 jdolecek Exp $ */
 
 /*-
  * Copyright (c) 2009 Jonathan A. Kollasch.
@@ -51,7 +51,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: satafis_subr.c,v 1.7.28.2 2017/04/19 20:49:17 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: satafis_subr.c,v 1.7.28.3 2017/07/19 19:39:28 jdolecek Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -160,12 +160,11 @@ satafis_rhd_construct_atapi(struct ata_x
 }
 #endif /* NATAPIBUS */
 
-void
+int
 satafis_rdh_parse(struct ata_channel *chp, const uint8_t *fis)
 {
 
-	chp->ch_status = fis[rdh_status];
-	chp->ch_error = fis[rdh_error];
+	return ATACH_ERR_ST(fis[rdh_error], fis[rdh_status]);
 }
 
 void

Index: src/sys/dev/ata/satafisvar.h
diff -u src/sys/dev/ata/satafisvar.h:1.3 src/sys/dev/ata/satafisvar.h:1.3.50.1
--- src/sys/dev/ata/satafisvar.h:1.3	Wed Apr  7 17:51:16 2010
+++ src/sys/dev/ata/satafisvar.h	Wed Jul 19 19:39:28 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: satafisvar.h,v 1.3 2010/04/07 17:51:16 jakllsch Exp $ */
+/* $NetBSD: satafisvar.h,v 1.3.50.1 2017/07/19 19:39:28 jdolecek Exp $ */
 
 /*
  * Copyright (c) 2009, 2010 Jonathan A. Kollasch.
@@ -34,7 +34,7 @@ void satafis_rhd_construct_cmd(struct at
 void satafis_rhd_construct_bio(struct ata_xfer *, uint8_t *);
 void satafis_rhd_construct_atapi(struct ata_xfer *, uint8_t *);
 
-void satafis_rdh_parse(struct ata_channel *, const uint8_t *);
+int satafis_rdh_parse(struct ata_channel *, const uint8_t *);
 void satafis_rdh_cmd_readreg(struct ata_command *, const uint8_t *);
 
 #endif /* _DEV_ATA_FISVAR_H_ */

Index: src/sys/dev/ata/wd.c
diff -u src/sys/dev/ata/wd.c:1.428.2.25 src/sys/dev/ata/wd.c:1.428.2.26
--- src/sys/dev/ata/wd.c:1.428.2.25	Mon Jul  3 19:54:44 2017
+++ src/sys/dev/ata/wd.c	Wed Jul 19 19:39:28 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: wd.c,v 1.428.2.25 2017/07/03 19:54:44 jdolecek Exp $ */
+/*	$NetBSD: wd.c,v 1.428.2.26 2017/07/19 19:39:28 jdolecek Exp $ */
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
@@ -54,7 +54,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.428.2.25 2017/07/03 19:54:44 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.428.2.26 2017/07/19 19:39:28 jdolecek Exp $");
 
 #include "opt_ata.h"
 
@@ -116,6 +116,7 @@ int wdcdebug_wd_mask = 0x0;
 #endif
 
 #ifdef WD_CHAOS_MONKEY
+int wdcdebug_wd_cnt = 200;
 int wdcdebug_wd_chaos = 0;
 #endif
 
@@ -198,7 +199,7 @@ void  wdgetdefaultlabel(struct wd_softc 
 void  wdgetdisklabel(struct wd_softc *);
 void  wdstart(device_t);
 void  wdstart1(struct wd_softc *, struct buf *, struct ata_xfer *);
-void  wdrestart(void *);
+static void  wdbiorestart(void *);
 void  wddone(device_t, struct ata_xfer *);
 static void wd_params_to_properties(struct wd_softc *);
 int   wd_get_params(struct wd_softc *, uint8_t, struct ataparams *);
@@ -302,14 +303,11 @@ wdattach(device_t parent, device_t self,
 	wd->sc_dev = self;
 
 	ATADEBUG_PRINT(("wdattach\n"), DEBUG_FUNCS | DEBUG_PROBE);
-	callout_init(&wd->sc_restart_ch, 0);
-	callout_setfunc(&wd->sc_restart_ch, wdrestart, wd);
 	mutex_init(&wd->sc_lock, MUTEX_DEFAULT, IPL_BIO);
 	bufq_alloc(&wd->sc_q, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
 #ifdef WD_SOFTBADSECT
 	SLIST_INIT(&wd->sc_bslist);
 #endif
-	STAILQ_INIT(&wd->xfer_restart);
 	wd->atabus = adev->adev_bustype;
 	wd->drvp = adev->adev_drv_data;
 
@@ -525,7 +523,6 @@ wddetach(device_t self, int flags)
 	rnd_detach_source(&sc->rnd_source);
 
 	mutex_destroy(&sc->sc_lock);
-	callout_destroy(&sc->sc_restart_ch);
 
 	sc->drvp->drive_type = ATA_DRIVET_NONE; /* no drive any more here */
 	sc->drvp->drive_flags = 0;
@@ -674,7 +671,7 @@ wdstart(device_t self)
 		bp = bufq_get(wd->sc_q);
 		KASSERT(bp != NULL);
 
-		xfer->c_bio.retries = 0;
+		xfer->c_retries = 0;
 		wdstart1(wd, bp, xfer);
 	}
 
@@ -688,17 +685,21 @@ wdstart1(struct wd_softc *wd, struct buf
 	/* must be locked on entry */
 	KASSERT(mutex_owned(&wd->sc_lock));
 
+	KASSERT(bp == xfer->c_bio.bp || xfer->c_bio.bp == NULL);
+	KASSERT((xfer->c_flags & (C_WAITACT|C_FREE)) == 0);
+
+	/* Reset state, so that retries don't use stale info */
+	if (__predict_false(xfer->c_retries > 0)) {
+		xfer->c_flags = 0;
+		memset(&xfer->c_bio, 0, sizeof(xfer->c_bio));
+	}
+
 	xfer->c_bio.blkno = bp->b_rawblkno;
 	xfer->c_bio.bcount = bp->b_bcount;
 	xfer->c_bio.databuf = bp->b_data;
 	xfer->c_bio.blkdone = 0;
-	KASSERT(bp == xfer->c_bio.bp || xfer->c_bio.bp == NULL);
 	xfer->c_bio.bp = bp;
 
-	/* Reset state flags, so that retries don't use stale info */
-	KASSERT((xfer->c_flags & (C_WAITACT|C_FREE)) == 0);
-	xfer->c_flags = 0;
-
 #ifdef WD_CHAOS_MONKEY
 	/*
 	 * Override blkno to be over device capacity to trigger error,
@@ -706,7 +707,8 @@ wdstart1(struct wd_softc *wd, struct buf
 	 * the command be clipped, or otherwise misinterpreted, by the
 	 * driver or controller.
 	 */
-	if (BUF_ISREAD(bp) && (++wdcdebug_wd_chaos % WD_CHAOS_MONKEY) == 0) {
+	if (BUF_ISREAD(bp) && xfer->c_retries == 0 && wdcdebug_wd_cnt > 0 &&
+	    (++wdcdebug_wd_chaos % wdcdebug_wd_cnt) == 0) {
 		aprint_normal_dev(wd->sc_dev, "%s: chaos xfer %d\n",
 		    __func__, xfer->c_slot);
 		xfer->c_bio.blkno = 7777777 + wd->sc_capacity;
@@ -718,7 +720,7 @@ wdstart1(struct wd_softc *wd, struct buf
 	 * the sector number of the problem, and will eventually allow the
 	 * transfer to succeed.
 	 */
-	if (xfer->c_bio.retries >= WDIORETRIES_SINGLE)
+	if (xfer->c_retries >= WDIORETRIES_SINGLE)
 		xfer->c_bio.flags = ATA_SINGLE;
 	else
 		xfer->c_bio.flags = 0;
@@ -739,7 +741,7 @@ wdstart1(struct wd_softc *wd, struct buf
 	 * retrying with NCQ.
 	 */
 	if (wd->drvp->drive_flags & ATA_DRIVE_NCQ &&
-	    (xfer->c_bio.retries == 0 || (bp->b_flags & B_MEDIA_FUA))) {
+	    (xfer->c_retries == 0 || (bp->b_flags & B_MEDIA_FUA))) {
 		xfer->c_bio.flags |= ATA_LBA48;
 		xfer->c_flags |= C_NCQ;
 
@@ -785,7 +787,8 @@ wddone(device_t self, struct ata_xfer *x
 
 	ATADEBUG_PRINT(("wddone %s\n", device_xname(wd->sc_dev)),
 	    DEBUG_XFERS);
-	if (wddoingadump) {
+
+	if (__predict_false(wddoingadump)) {
 		/* just drop it to the floor */
 		ata_free_xfer(wd->drvp->chnl_softc, xfer);
 		return;
@@ -805,6 +808,9 @@ wddone(device_t self, struct ata_xfer *x
 	case TIMEOUT:
 		errmsg = "device timeout";
 		goto retry;
+	case REQUEUE:
+		errmsg = "requeue";
+		goto retry2;
 	case ERR_RESET:
 		errmsg = "channel reset";
 		goto retry2;
@@ -822,25 +828,27 @@ retry2:
 
 		diskerr(bp, "wd", errmsg, LOG_PRINTF,
 		    xfer->c_bio.blkdone, wd->sc_dk.dk_label);
-		if (xfer->c_bio.retries < WDIORETRIES)
-			printf(", retrying %d", xfer->c_bio.retries + 1);
+		if (xfer->c_retries < WDIORETRIES)
+			printf(", slot %d, retry %d", xfer->c_slot,
+			    xfer->c_retries + 1);
 		printf("\n");
 		if (do_perror)
 			wdperror(wd, xfer);
-		if (xfer->c_bio.retries < WDIORETRIES) {
-			xfer->c_bio.retries++;
-			STAILQ_INSERT_TAIL(&wd->xfer_restart, xfer,
-			    c_restartchain);
-
-			/*
-			 * Only restart the timer if it's not already pending,
-			 * so that we wouldn't postpone processing beyond
-			 * original schedule.
-			 */
-			if (!callout_pending(&wd->sc_restart_ch)) {
-				callout_schedule(&wd->sc_restart_ch,
-				    RECOVERYTIME);
+
+		if (xfer->c_retries < WDIORETRIES) {
+			int timo;
+
+			if (xfer->c_bio.error == REQUEUE) {
+				/* rerun ASAP, and do not count as retry */
+				timo = 1;
+			} else {
+				xfer->c_retries++;
+				timo = RECOVERYTIME;
 			}
+
+			callout_reset(&xfer->c_retry_callout, timo,
+			    wdbiorestart, xfer);
+
 			mutex_exit(&wd->sc_lock);
 			return;
 		}
@@ -881,7 +889,7 @@ out:
 		bp->b_error = EIO;
 		break;
 	case NOERROR:
-noerror:	if ((xfer->c_bio.flags & ATA_CORR) || xfer->c_bio.retries > 0)
+noerror:	if ((xfer->c_bio.flags & ATA_CORR) || xfer->c_retries > 0)
 			aprint_error_dev(wd->sc_dev,
 			    "soft error (corrected)\n");
 		break;
@@ -905,27 +913,18 @@ noerror:	if ((xfer->c_bio.flags & ATA_CO
 	ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive);
 }
 
-void
-wdrestart(void *v)
+static void
+wdbiorestart(void *v)
 {
-	struct wd_softc *wd = v;
-	struct ata_xfer *xfer;
+	struct ata_xfer *xfer = v;
+	struct buf *bp = xfer->c_bio.bp;
+	struct wd_softc *wd = device_lookup_private(&wd_cd, WDUNIT(bp->b_dev));
 
 	ATADEBUG_PRINT(("wdrestart %s\n", device_xname(wd->sc_dev)),
 	    DEBUG_XFERS);
 
-	/*
-	 * Resend all failed xfers out immediatelly regardless of original
-	 * schedule, so that we error out reasonably fast in case of massive
-	 * permanent errors.
-	 */
 	mutex_enter(&wd->sc_lock);
-	while (!STAILQ_EMPTY(&wd->xfer_restart)) {
-		xfer = STAILQ_FIRST(&wd->xfer_restart);
-		STAILQ_REMOVE_HEAD(&wd->xfer_restart, c_restartchain);
-
-		wdstart1(v, xfer->c_bio.bp, xfer);
-	}
+	wdstart1(wd, bp, xfer);
 	mutex_exit(&wd->sc_lock);
 }
 

Index: src/sys/dev/ata/wdvar.h
diff -u src/sys/dev/ata/wdvar.h:1.43.4.6 src/sys/dev/ata/wdvar.h:1.43.4.7
--- src/sys/dev/ata/wdvar.h:1.43.4.6	Fri Jun 23 20:40:51 2017
+++ src/sys/dev/ata/wdvar.h	Wed Jul 19 19:39:28 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: wdvar.h,v 1.43.4.6 2017/06/23 20:40:51 jdolecek Exp $	*/
+/*	$NetBSD: wdvar.h,v 1.43.4.7 2017/07/19 19:39:28 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.
@@ -38,7 +38,6 @@ struct wd_softc {
 	device_t sc_dev;
 	struct disk sc_dk;
 	struct bufq_state *sc_q;
-	struct callout sc_restart_ch;
 	kmutex_t sc_lock;
 	int sc_quirks;			/* any quirks drive might have */
 
@@ -69,8 +68,6 @@ struct wd_softc {
 	u_int sc_bscount;
 #endif
 	krndsource_t	rnd_source;
-
-	STAILQ_HEAD(, ata_xfer) xfer_restart;
 };
 
 #endif /* _DEV_ATA_WDVAR_H_ */

Reply via email to