Module Name: src Committed By: jdolecek Date: Tue Aug 1 22:02:32 UTC 2017
Modified Files: src/sys/dev/ic [jdolecek-ncq]: ahcisata_core.c ahcisatavar.h mvsatavar.h siisata.c siisatavar.h Log Message: fix logic bug in processing of finished commands - mask of active commands can change during the loop as c_intr() callback can queue new commands, so the interrupt routine should only mark as finished those which were actually active before the loop started; otherwise the code marked as finished commands which were just started, and being executed by HBA, leading to all sorts of data corruption while here mark the active mask volatile, as it is modified from interrupt context this fixes for good the random crashes, short reads, and fatal command errors which I've been tracing down for past couple weeks thanks to Jonathan (jakllsch@) for testing, and a script to easily triggered the condition, and led to this bug being finally found and squashed To generate a diff of this commit: cvs rdiff -u -r1.57.6.24 -r1.57.6.25 src/sys/dev/ic/ahcisata_core.c cvs rdiff -u -r1.17.6.2 -r1.17.6.3 src/sys/dev/ic/ahcisatavar.h cvs rdiff -u -r1.2.48.2 -r1.2.48.3 src/sys/dev/ic/mvsatavar.h cvs rdiff -u -r1.30.4.30 -r1.30.4.31 src/sys/dev/ic/siisata.c cvs rdiff -u -r1.6.48.2 -r1.6.48.3 src/sys/dev/ic/siisatavar.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/ic/ahcisata_core.c diff -u src/sys/dev/ic/ahcisata_core.c:1.57.6.24 src/sys/dev/ic/ahcisata_core.c:1.57.6.25 --- src/sys/dev/ic/ahcisata_core.c:1.57.6.24 Sat Jul 29 16:50:32 2017 +++ src/sys/dev/ic/ahcisata_core.c Tue Aug 1 22:02:32 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: ahcisata_core.c,v 1.57.6.24 2017/07/29 16:50:32 jdolecek Exp $ */ +/* $NetBSD: ahcisata_core.c,v 1.57.6.25 2017/08/01 22:02:32 jdolecek Exp $ */ /* * Copyright (c) 2006 Manuel Bouyer. @@ -26,7 +26,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ahcisata_core.c,v 1.57.6.24 2017/07/29 16:50:32 jdolecek Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ahcisata_core.c,v 1.57.6.25 2017/08/01 22:02:32 jdolecek Exp $"); #include <sys/types.h> #include <sys/malloc.h> @@ -559,7 +559,7 @@ ahci_intr(void *v) static void ahci_intr_port(struct ahci_softc *sc, struct ahci_channel *achp) { - uint32_t is, tfd, active; + uint32_t is, tfd, sact; struct ata_channel *chp = &achp->ata_channel; struct ata_xfer *xfer; int slot; @@ -578,12 +578,12 @@ ahci_intr_port(struct ahci_softc *sc, st if ((chp->ch_flags & ATACH_NCQ) == 0) { /* Non-NCQ operation */ - active = AHCI_READ(sc, AHCI_P_CI(chp->ch_channel)); + sact = AHCI_READ(sc, AHCI_P_CI(chp->ch_channel)); slot = (AHCI_READ(sc, AHCI_P_CMD(chp->ch_channel)) & AHCI_P_CMD_CCS_MASK) >> AHCI_P_CMD_CCS_SHIFT; } else { /* NCQ operation */ - active = AHCI_READ(sc, AHCI_P_SACT(chp->ch_channel)); + sact = AHCI_READ(sc, AHCI_P_SACT(chp->ch_channel)); slot = -1; } @@ -595,7 +595,7 @@ ahci_intr_port(struct ahci_softc *sc, st tfd = AHCI_READ(sc, AHCI_P_TFD(chp->ch_channel)); aprint_error("%s port %d: active %x is 0x%x tfd 0x%x\n", - AHCINAME(sc), chp->ch_channel, active, is, tfd); + AHCINAME(sc), chp->ch_channel, sact, is, tfd); } else { /* mark an error, and set BSY */ tfd = (WDCE_ABRT << AHCI_P_TFD_ERR_SHIFT) | @@ -630,8 +630,8 @@ ahci_intr_port(struct ahci_softc *sc, st ata_channel_freeze(chp); if (slot >= 0) { - if ((achp->ahcic_cmds_active & (1 << slot)) != 0 && - (active & (1 << slot)) == 0) { + if ((achp->ahcic_cmds_active & __BIT(slot)) != 0 && + (sact & __BIT(slot)) == 0) { xfer = ata_queue_hwslot_to_xfer(chp, slot); xfer->c_intr(chp, xfer, tfd); } @@ -641,10 +641,15 @@ ahci_intr_port(struct ahci_softc *sc, st * and any further D2H FISes are ignored until the error * condition is cleared. Hence if a command is inactive, * it means it actually already finished successfully. + * Note: active slots can change as c_intr() callback + * can activate another command(s), so must only process + * commands active before we start processing. */ + uint32_t aslots = achp->ahcic_cmds_active; + for (slot=0; slot < sc->sc_ncmds; slot++) { - if ((achp->ahcic_cmds_active & (1 << slot)) != 0 && - (active & (1 << slot)) == 0) { + if ((aslots & __BIT(slot)) != 0 && + (sact & __BIT(slot)) == 0) { xfer = ata_queue_hwslot_to_xfer(chp, slot); xfer->c_intr(chp, xfer, 0); } Index: src/sys/dev/ic/ahcisatavar.h diff -u src/sys/dev/ic/ahcisatavar.h:1.17.6.2 src/sys/dev/ic/ahcisatavar.h:1.17.6.3 --- src/sys/dev/ic/ahcisatavar.h:1.17.6.2 Sat Jul 29 15:07:46 2017 +++ src/sys/dev/ic/ahcisatavar.h Tue Aug 1 22:02:32 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: ahcisatavar.h,v 1.17.6.2 2017/07/29 15:07:46 jdolecek Exp $ */ +/* $NetBSD: ahcisatavar.h,v 1.17.6.3 2017/08/01 22:02:32 jdolecek Exp $ */ /* * Copyright (c) 2006 Manuel Bouyer. @@ -82,7 +82,7 @@ struct ahci_softc { struct ahci_cmd_tbl *ahcic_cmd_tbl[AHCI_MAX_CMDS]; bus_addr_t ahcic_bus_cmd_tbl[AHCI_MAX_CMDS]; bus_dmamap_t ahcic_datad[AHCI_MAX_CMDS]; - uint32_t ahcic_cmds_active; /* active commands */ + volatile uint32_t ahcic_cmds_active; /* active commands */ uint32_t ahcic_cmds_hold; /* held commands */ bool ahcic_recovering; } sc_channels[AHCI_MAX_PORTS]; Index: src/sys/dev/ic/mvsatavar.h diff -u src/sys/dev/ic/mvsatavar.h:1.2.48.2 src/sys/dev/ic/mvsatavar.h:1.2.48.3 --- src/sys/dev/ic/mvsatavar.h:1.2.48.2 Sat Jun 24 14:33:06 2017 +++ src/sys/dev/ic/mvsatavar.h Tue Aug 1 22:02:32 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: mvsatavar.h,v 1.2.48.2 2017/06/24 14:33:06 jdolecek Exp $ */ +/* $NetBSD: mvsatavar.h,v 1.2.48.3 2017/08/01 22:02:32 jdolecek Exp $ */ /* * Copyright (c) 2008 KIYOHARA Takashi * All rights reserved. @@ -73,7 +73,7 @@ struct mvsata_port { enum mvsata_edmamode port_edmamode_negotiated; enum mvsata_edmamode port_edmamode_curr; - uint32_t port_quetagidx; /* Host Queue Tag valiable */ + volatile uint32_t port_quetagidx; /* Host Queue Tag valid */ int port_prev_erqqop; /* previous Req Queue Out-Pointer */ bus_dma_tag_t port_dmat; Index: src/sys/dev/ic/siisata.c diff -u src/sys/dev/ic/siisata.c:1.30.4.30 src/sys/dev/ic/siisata.c:1.30.4.31 --- src/sys/dev/ic/siisata.c:1.30.4.30 Tue Aug 1 21:43:49 2017 +++ src/sys/dev/ic/siisata.c Tue Aug 1 22:02:32 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: siisata.c,v 1.30.4.30 2017/08/01 21:43:49 jdolecek Exp $ */ +/* $NetBSD: siisata.c,v 1.30.4.31 2017/08/01 22:02:32 jdolecek Exp $ */ /* from ahcisata_core.c */ @@ -79,7 +79,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: siisata.c,v 1.30.4.30 2017/08/01 21:43:49 jdolecek Exp $"); +__KERNEL_RCSID(0, "$NetBSD: siisata.c,v 1.30.4.31 2017/08/01 22:02:32 jdolecek Exp $"); #include <sys/types.h> #include <sys/param.h> @@ -560,9 +560,14 @@ siisata_intr_port(struct siisata_channel * and any further D2H FISes are ignored until the error * condition is cleared. Hence if a command is inactive, * it means it actually already finished successfully. + * Note: active slots can change as c_intr() callback + * can activate another command(s), so must only process + * commands active before we start processing. */ + uint32_t aslots = schp->sch_active_slots; + for (int slot=0; slot < SIISATA_MAX_SLOTS; slot++) { - if ((schp->sch_active_slots & __BIT(slot)) != 0 && + if ((aslots & __BIT(slot)) != 0 && (pss & PR_PXSS(slot)) == 0) { xfer = ata_queue_hwslot_to_xfer(chp, slot); xfer->c_intr(chp, xfer, 0); Index: src/sys/dev/ic/siisatavar.h diff -u src/sys/dev/ic/siisatavar.h:1.6.48.2 src/sys/dev/ic/siisatavar.h:1.6.48.3 --- src/sys/dev/ic/siisatavar.h:1.6.48.2 Wed Jul 19 20:03:29 2017 +++ src/sys/dev/ic/siisatavar.h Tue Aug 1 22:02:32 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: siisatavar.h,v 1.6.48.2 2017/07/19 20:03:29 jdolecek Exp $ */ +/* $NetBSD: siisatavar.h,v 1.6.48.3 2017/08/01 22:02:32 jdolecek Exp $ */ /* from ahcisatavar.h */ @@ -100,7 +100,7 @@ struct siisata_softc { bus_dmamap_t sch_datad[SIISATA_MAX_SLOTS]; - uint32_t sch_active_slots; + volatile uint32_t sch_active_slots; uint32_t sch_hold_slots; bool sch_recovering; } sc_channels[SIISATA_MAX_PORTS];