Module Name:    src
Committed By:   jdolecek
Date:           Tue Aug  1 22:02:32 UTC 2017

Modified Files:
        src/sys/dev/ic [jdolecek-ncq]: ahcisata_core.c ahcisatavar.h
            mvsatavar.h siisata.c siisatavar.h

Log Message:
fix logic bug in processing of finished commands - mask of active
commands can change during the loop as c_intr() callback can queue
new commands, so the interrupt routine should only mark as finished
those which were actually active before the loop started; otherwise
the code marked as finished commands which were just started, and
being executed by HBA, leading to all sorts of data corruption

while here mark the active mask volatile, as it is modified from
interrupt context

this fixes for good the random crashes, short reads, and fatal command
errors which I've been tracing down for past couple weeks

thanks to Jonathan (jakllsch@) for testing, and a script to easily
triggered the condition, and led to this bug being finally found and squashed


To generate a diff of this commit:
cvs rdiff -u -r1.57.6.24 -r1.57.6.25 src/sys/dev/ic/ahcisata_core.c
cvs rdiff -u -r1.17.6.2 -r1.17.6.3 src/sys/dev/ic/ahcisatavar.h
cvs rdiff -u -r1.2.48.2 -r1.2.48.3 src/sys/dev/ic/mvsatavar.h
cvs rdiff -u -r1.30.4.30 -r1.30.4.31 src/sys/dev/ic/siisata.c
cvs rdiff -u -r1.6.48.2 -r1.6.48.3 src/sys/dev/ic/siisatavar.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/ic/ahcisata_core.c
diff -u src/sys/dev/ic/ahcisata_core.c:1.57.6.24 src/sys/dev/ic/ahcisata_core.c:1.57.6.25
--- src/sys/dev/ic/ahcisata_core.c:1.57.6.24	Sat Jul 29 16:50:32 2017
+++ src/sys/dev/ic/ahcisata_core.c	Tue Aug  1 22:02:32 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: ahcisata_core.c,v 1.57.6.24 2017/07/29 16:50:32 jdolecek Exp $	*/
+/*	$NetBSD: ahcisata_core.c,v 1.57.6.25 2017/08/01 22:02:32 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ahcisata_core.c,v 1.57.6.24 2017/07/29 16:50:32 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ahcisata_core.c,v 1.57.6.25 2017/08/01 22:02:32 jdolecek Exp $");
 
 #include <sys/types.h>
 #include <sys/malloc.h>
@@ -559,7 +559,7 @@ ahci_intr(void *v)
 static void
 ahci_intr_port(struct ahci_softc *sc, struct ahci_channel *achp)
 {
-	uint32_t is, tfd, active;
+	uint32_t is, tfd, sact;
 	struct ata_channel *chp = &achp->ata_channel;
 	struct ata_xfer *xfer;
 	int slot;
@@ -578,12 +578,12 @@ ahci_intr_port(struct ahci_softc *sc, st
 
 	if ((chp->ch_flags & ATACH_NCQ) == 0) {
 		/* Non-NCQ operation */
-		active = AHCI_READ(sc, AHCI_P_CI(chp->ch_channel));
+		sact = AHCI_READ(sc, AHCI_P_CI(chp->ch_channel));
 		slot = (AHCI_READ(sc, AHCI_P_CMD(chp->ch_channel))
 			& AHCI_P_CMD_CCS_MASK) >> AHCI_P_CMD_CCS_SHIFT;
 	} else {
 		/* NCQ operation */
-		active = AHCI_READ(sc, AHCI_P_SACT(chp->ch_channel));
+		sact = AHCI_READ(sc, AHCI_P_SACT(chp->ch_channel));
 		slot = -1;
 	}
 
@@ -595,7 +595,7 @@ ahci_intr_port(struct ahci_softc *sc, st
 			tfd = AHCI_READ(sc, AHCI_P_TFD(chp->ch_channel));
 
 			aprint_error("%s port %d: active %x is 0x%x tfd 0x%x\n",
-			    AHCINAME(sc), chp->ch_channel, active, is, tfd);
+			    AHCINAME(sc), chp->ch_channel, sact, is, tfd);
 		} else {
 			/* mark an error, and set BSY */
 			tfd = (WDCE_ABRT << AHCI_P_TFD_ERR_SHIFT) |
@@ -630,8 +630,8 @@ ahci_intr_port(struct ahci_softc *sc, st
 		ata_channel_freeze(chp);
 
 	if (slot >= 0) {
-		if ((achp->ahcic_cmds_active & (1 << slot)) != 0 &&
-		    (active & (1 << slot)) == 0) {
+		if ((achp->ahcic_cmds_active & __BIT(slot)) != 0 &&
+		    (sact & __BIT(slot)) == 0) {
 			xfer = ata_queue_hwslot_to_xfer(chp, slot);
 			xfer->c_intr(chp, xfer, tfd);
 		}
@@ -641,10 +641,15 @@ ahci_intr_port(struct ahci_softc *sc, st
 		 * and any further D2H FISes are ignored until the error
 		 * condition is cleared. Hence if a command is inactive,
 		 * it means it actually already finished successfully.
+		 * Note: active slots can change as c_intr() callback
+		 * can activate another command(s), so must only process
+		 * commands active before we start processing.
 		 */
+		uint32_t aslots = achp->ahcic_cmds_active;
+
 		for (slot=0; slot < sc->sc_ncmds; slot++) {
-			if ((achp->ahcic_cmds_active & (1 << slot)) != 0 &&
-			    (active & (1 << slot)) == 0) {
+			if ((aslots & __BIT(slot)) != 0 &&
+			    (sact & __BIT(slot)) == 0) {
 				xfer = ata_queue_hwslot_to_xfer(chp, slot);
 				xfer->c_intr(chp, xfer, 0);
 			}

Index: src/sys/dev/ic/ahcisatavar.h
diff -u src/sys/dev/ic/ahcisatavar.h:1.17.6.2 src/sys/dev/ic/ahcisatavar.h:1.17.6.3
--- src/sys/dev/ic/ahcisatavar.h:1.17.6.2	Sat Jul 29 15:07:46 2017
+++ src/sys/dev/ic/ahcisatavar.h	Tue Aug  1 22:02:32 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: ahcisatavar.h,v 1.17.6.2 2017/07/29 15:07:46 jdolecek Exp $	*/
+/*	$NetBSD: ahcisatavar.h,v 1.17.6.3 2017/08/01 22:02:32 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -82,7 +82,7 @@ struct ahci_softc {
 		struct ahci_cmd_tbl *ahcic_cmd_tbl[AHCI_MAX_CMDS];
 		bus_addr_t ahcic_bus_cmd_tbl[AHCI_MAX_CMDS];
 		bus_dmamap_t ahcic_datad[AHCI_MAX_CMDS];
-		uint32_t  ahcic_cmds_active;	/* active commands */
+		volatile uint32_t  ahcic_cmds_active;	/* active commands */
 		uint32_t  ahcic_cmds_hold; 	/* held commands */
 		bool ahcic_recovering;
 	} sc_channels[AHCI_MAX_PORTS];

Index: src/sys/dev/ic/mvsatavar.h
diff -u src/sys/dev/ic/mvsatavar.h:1.2.48.2 src/sys/dev/ic/mvsatavar.h:1.2.48.3
--- src/sys/dev/ic/mvsatavar.h:1.2.48.2	Sat Jun 24 14:33:06 2017
+++ src/sys/dev/ic/mvsatavar.h	Tue Aug  1 22:02:32 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: mvsatavar.h,v 1.2.48.2 2017/06/24 14:33:06 jdolecek Exp $	*/
+/*	$NetBSD: mvsatavar.h,v 1.2.48.3 2017/08/01 22:02:32 jdolecek Exp $	*/
 /*
  * Copyright (c) 2008 KIYOHARA Takashi
  * All rights reserved.
@@ -73,7 +73,7 @@ struct mvsata_port {
 	enum mvsata_edmamode port_edmamode_negotiated;
 	enum mvsata_edmamode port_edmamode_curr;
 
-	uint32_t port_quetagidx;	/* Host Queue Tag valiable */
+	volatile uint32_t port_quetagidx;	/* Host Queue Tag valid */
 
 	int port_prev_erqqop;		/* previous Req Queue Out-Pointer */
 	bus_dma_tag_t port_dmat;

Index: src/sys/dev/ic/siisata.c
diff -u src/sys/dev/ic/siisata.c:1.30.4.30 src/sys/dev/ic/siisata.c:1.30.4.31
--- src/sys/dev/ic/siisata.c:1.30.4.30	Tue Aug  1 21:43:49 2017
+++ src/sys/dev/ic/siisata.c	Tue Aug  1 22:02:32 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: siisata.c,v 1.30.4.30 2017/08/01 21:43:49 jdolecek Exp $ */
+/* $NetBSD: siisata.c,v 1.30.4.31 2017/08/01 22:02:32 jdolecek Exp $ */
 
 /* from ahcisata_core.c */
 
@@ -79,7 +79,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: siisata.c,v 1.30.4.30 2017/08/01 21:43:49 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: siisata.c,v 1.30.4.31 2017/08/01 22:02:32 jdolecek Exp $");
 
 #include <sys/types.h>
 #include <sys/param.h>
@@ -560,9 +560,14 @@ siisata_intr_port(struct siisata_channel
 		 * and any further D2H FISes are ignored until the error
 		 * condition is cleared. Hence if a command is inactive,
 		 * it means it actually already finished successfully.
+		 * Note: active slots can change as c_intr() callback
+		 * can activate another command(s), so must only process
+		 * commands active before we start processing.
 		 */
+		uint32_t aslots = schp->sch_active_slots;
+
 		for (int slot=0; slot < SIISATA_MAX_SLOTS; slot++) {
-			if ((schp->sch_active_slots & __BIT(slot)) != 0 &&
+			if ((aslots & __BIT(slot)) != 0 &&
 			    (pss & PR_PXSS(slot)) == 0) {
 				xfer = ata_queue_hwslot_to_xfer(chp, slot);
 				xfer->c_intr(chp, xfer, 0);

Index: src/sys/dev/ic/siisatavar.h
diff -u src/sys/dev/ic/siisatavar.h:1.6.48.2 src/sys/dev/ic/siisatavar.h:1.6.48.3
--- src/sys/dev/ic/siisatavar.h:1.6.48.2	Wed Jul 19 20:03:29 2017
+++ src/sys/dev/ic/siisatavar.h	Tue Aug  1 22:02:32 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: siisatavar.h,v 1.6.48.2 2017/07/19 20:03:29 jdolecek Exp $ */
+/* $NetBSD: siisatavar.h,v 1.6.48.3 2017/08/01 22:02:32 jdolecek Exp $ */
 
 /* from ahcisatavar.h */
 
@@ -100,7 +100,7 @@ struct siisata_softc {
 
 		bus_dmamap_t sch_datad[SIISATA_MAX_SLOTS];
 
-		uint32_t sch_active_slots;
+		volatile uint32_t sch_active_slots;
 		uint32_t sch_hold_slots;
 		bool sch_recovering;
 	} sc_channels[SIISATA_MAX_PORTS];

Reply via email to