Module Name:    src
Committed By:   jdolecek
Date:           Sat Apr  4 21:36:15 UTC 2020

Modified Files:
        src/sys/dev/ata: ata.c ata_wdc.c atavar.h
        src/sys/dev/ic: mvsata.c wdc.c
        src/sys/dev/scsipi: atapi_wdc.c

Log Message:
fix deadlock in wdcwait() when xfer timeout happens while the atabus
thread sleeps in wdcwait() - check current lwp rather than relying
on global ATACH_TH_RUN channel flag

should fix the hang part of the problem reported in
http://mail-index.netbsd.org/netbsd-users/2020/03/12/msg024249.html

thanks to Paul Ripke for providing extensive debugging info


To generate a diff of this commit:
cvs rdiff -u -r1.153 -r1.154 src/sys/dev/ata/ata.c
cvs rdiff -u -r1.113 -r1.114 src/sys/dev/ata/ata_wdc.c
cvs rdiff -u -r1.103 -r1.104 src/sys/dev/ata/atavar.h
cvs rdiff -u -r1.54 -r1.55 src/sys/dev/ic/mvsata.c
cvs rdiff -u -r1.297 -r1.298 src/sys/dev/ic/wdc.c
cvs rdiff -u -r1.136 -r1.137 src/sys/dev/scsipi/atapi_wdc.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/ata/ata.c
diff -u src/sys/dev/ata/ata.c:1.153 src/sys/dev/ata/ata.c:1.154
--- src/sys/dev/ata/ata.c:1.153	Mon Oct 21 18:58:57 2019
+++ src/sys/dev/ata/ata.c	Sat Apr  4 21:36:15 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: ata.c,v 1.153 2019/10/21 18:58:57 christos Exp $	*/
+/*	$NetBSD: ata.c,v 1.154 2020/04/04 21:36:15 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ata.c,v 1.153 2019/10/21 18:58:57 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ata.c,v 1.154 2020/04/04 21:36:15 jdolecek Exp $");
 
 #include "opt_ata.h"
 
@@ -44,6 +44,7 @@ __KERNEL_RCSID(0, "$NetBSD: ata.c,v 1.15
 #include <sys/bus.h>
 #include <sys/once.h>
 #include <sys/bitops.h>
+#include <sys/cpu.h>
 
 #define ATABUS_PRIVATE
 
@@ -229,9 +230,6 @@ atabusconfig(struct atabus_softc *atabus
 	int i, error;
 
 	/* we are in the atabus's thread context */
-	ata_channel_lock(chp);
-	chp->ch_flags |= ATACH_TH_RUN;
-	ata_channel_unlock(chp);
 
 	/*
 	 * Probe for the drives attached to controller, unless a PMP
@@ -247,11 +245,6 @@ atabusconfig(struct atabus_softc *atabus
 		    DEBUG_PROBE);
 	}
 
-	/* next operations will occurs in a separate thread */
-	ata_channel_lock(chp);
-	chp->ch_flags &= ~ATACH_TH_RUN;
-	ata_channel_unlock(chp);
-
 	/* Make sure the devices probe in atabus order to avoid jitter. */
 	mutex_enter(&atabus_qlock);
 	for (;;) {
@@ -264,6 +257,8 @@ atabusconfig(struct atabus_softc *atabus
 
 	ata_channel_lock(chp);
 
+	KASSERT(ata_is_thread_run(chp));
+
 	/* If no drives, abort here */
 	if (chp->ch_drive == NULL)
 		goto out;
@@ -444,7 +439,7 @@ atabus_thread(void *arg)
 	int i, rv;
 
 	ata_channel_lock(chp);
-	chp->ch_flags |= ATACH_TH_RUN;
+	KASSERT(ata_is_thread_run(chp));
 
 	/*
 	 * Probe the drives.  Reset type to indicate to controllers
@@ -466,9 +461,7 @@ atabus_thread(void *arg)
 		if ((chp->ch_flags & (ATACH_TH_RESET | ATACH_TH_DRIVE_RESET
 		    | ATACH_TH_RECOVERY | ATACH_SHUTDOWN)) == 0 &&
 		    (chq->queue_active == 0 || chq->queue_freeze == 0)) {
-			chp->ch_flags &= ~ATACH_TH_RUN;
 			cv_wait(&chp->ch_thr_idle, &chp->ch_lock);
-			chp->ch_flags |= ATACH_TH_RUN;
 		}
 		if (chp->ch_flags & ATACH_SHUTDOWN) {
 			break;
@@ -547,6 +540,14 @@ atabus_thread(void *arg)
 	kthread_exit(0);
 }
 
+bool
+ata_is_thread_run(struct ata_channel *chp)
+{
+	KASSERT(mutex_owned(&chp->ch_lock));
+
+	return (chp->ch_thread == curlwp && !cpu_intr_p());
+}
+
 static void
 ata_thread_wake_locked(struct ata_channel *chp)
 {
@@ -1896,7 +1897,7 @@ ata_probe_caps(struct ata_drive_datas *d
 			 */
 			if (atac->atac_set_modes)
 				/*
-				 * It's OK to pool here, it's fast enough
+				 * It's OK to poll here, it's fast enough
 				 * to not bother waiting for interrupt
 				 */
 				if (ata_set_mode(drvp, 0x08 | (i + 3),

Index: src/sys/dev/ata/ata_wdc.c
diff -u src/sys/dev/ata/ata_wdc.c:1.113 src/sys/dev/ata/ata_wdc.c:1.114
--- src/sys/dev/ata/ata_wdc.c:1.113	Mon Nov 12 18:51:01 2018
+++ src/sys/dev/ata/ata_wdc.c	Sat Apr  4 21:36:15 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: ata_wdc.c,v 1.113 2018/11/12 18:51:01 jdolecek Exp $	*/
+/*	$NetBSD: ata_wdc.c,v 1.114 2020/04/04 21:36:15 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 1998, 2001, 2003 Manuel Bouyer.
@@ -54,7 +54,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ata_wdc.c,v 1.113 2018/11/12 18:51:01 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ata_wdc.c,v 1.114 2020/04/04 21:36:15 jdolecek Exp $");
 
 #include "opt_ata.h"
 #include "opt_wdc.h"
@@ -206,10 +206,9 @@ wdc_ata_bio_start(struct ata_channel *ch
 		 * that we never get to this point if that's the case.
 		 */
 		/* If it's not a polled command, we need the kernel thread */
-		if ((xfer->c_flags & C_POLL) == 0 &&
-		    (chp->ch_flags & ATACH_TH_RUN) == 0) {
+		if ((xfer->c_flags & C_POLL) == 0 && !ata_is_thread_run(chp))
 			return ATASTART_TH;
-		}
+
 		/*
 		 * disable interrupts, all commands here should be quick
 		 * enough to be able to poll, and we don't go here that often

Index: src/sys/dev/ata/atavar.h
diff -u src/sys/dev/ata/atavar.h:1.103 src/sys/dev/ata/atavar.h:1.104
--- src/sys/dev/ata/atavar.h:1.103	Fri Apr  5 21:31:44 2019
+++ src/sys/dev/ata/atavar.h	Sat Apr  4 21:36:15 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: atavar.h,v 1.103 2019/04/05 21:31:44 bouyer Exp $	*/
+/*	$NetBSD: atavar.h,v 1.104 2020/04/04 21:36:15 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.
@@ -406,7 +406,6 @@ struct ata_channel {
 #define ATACH_DMA_WAIT 0x20	/* controller is waiting for DMA */
 #define ATACH_PIOBM_WAIT 0x40	/* controller is waiting for busmastering PIO */
 #define	ATACH_DISABLED 0x80	/* channel is disabled */
-#define ATACH_TH_RUN   0x100	/* the kernel thread is working */
 #define ATACH_TH_RESET 0x200	/* someone ask the thread to reset */
 #define ATACH_TH_RESCAN 0x400	/* rescan requested */
 #define ATACH_NCQ	0x800	/* channel executing NCQ commands */
@@ -549,6 +548,7 @@ bool	ata_timo_xfer_check(struct ata_xfer
 void	ata_kill_pending(struct ata_drive_datas *);
 void	ata_kill_active(struct ata_channel *, int, int);
 void	ata_thread_run(struct ata_channel *, int, int, int);
+bool	ata_is_thread_run(struct ata_channel *);
 void	ata_channel_freeze(struct ata_channel *);
 void	ata_channel_thaw_locked(struct ata_channel *);
 void	ata_channel_lock(struct ata_channel *);

Index: src/sys/dev/ic/mvsata.c
diff -u src/sys/dev/ic/mvsata.c:1.54 src/sys/dev/ic/mvsata.c:1.55
--- src/sys/dev/ic/mvsata.c:1.54	Sun Mar 22 16:46:30 2020
+++ src/sys/dev/ic/mvsata.c	Sat Apr  4 21:36:15 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: mvsata.c,v 1.54 2020/03/22 16:46:30 macallan Exp $	*/
+/*	$NetBSD: mvsata.c,v 1.55 2020/04/04 21:36:15 jdolecek Exp $	*/
 /*
  * Copyright (c) 2008 KIYOHARA Takashi
  * All rights reserved.
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: mvsata.c,v 1.54 2020/03/22 16:46:30 macallan Exp $");
+__KERNEL_RCSID(0, "$NetBSD: mvsata.c,v 1.55 2020/04/04 21:36:15 jdolecek Exp $");
 
 #include "opt_mvsata.h"
 
@@ -1165,10 +1165,10 @@ do_pio:
 			 * If it's not a polled command, we need the kernel
 			 * thread
 			 */
-			if ((xfer->c_flags & C_POLL) == 0 &&
-			    (chp->ch_flags & ATACH_TH_RUN) == 0) {
+			if ((xfer->c_flags & C_POLL) == 0
+			    && !ata_is_thread_run(chp))
 				return ATASTART_TH;
-			}
+
 			if (mvsata_bio_ready(mvport, ata_bio, xfer->c_drive,
 			    (xfer->c_flags & C_POLL) ? AT_POLL : 0) != 0) {
 				return ATASTART_ABORT;
@@ -2052,10 +2052,10 @@ mvsata_atapi_start(struct ata_channel *c
 	/* Do control operations specially. */
 	if (__predict_false(drvp->state < READY)) {
 		/* If it's not a polled command, we need the kernel thread */
-		if ((sc_xfer->xs_control & XS_CTL_POLL) == 0 &&
-		    (chp->ch_flags & ATACH_TH_RUN) == 0) {
+		if ((sc_xfer->xs_control & XS_CTL_POLL) == 0
+		    && !ata_is_thread_run(chp))
 			return ATASTART_TH;
-		}
+
 		/*
 		 * disable interrupts, all commands here should be quick
 		 * enough to be able to poll, and we don't go here that often

Index: src/sys/dev/ic/wdc.c
diff -u src/sys/dev/ic/wdc.c:1.297 src/sys/dev/ic/wdc.c:1.298
--- src/sys/dev/ic/wdc.c:1.297	Mon Feb 10 20:11:48 2020
+++ src/sys/dev/ic/wdc.c	Sat Apr  4 21:36:15 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: wdc.c,v 1.297 2020/02/10 20:11:48 jdolecek Exp $ */
+/*	$NetBSD: wdc.c,v 1.298 2020/04/04 21:36:15 jdolecek Exp $ */
 
 /*
  * Copyright (c) 1998, 2001, 2003 Manuel Bouyer.  All rights reserved.
@@ -58,7 +58,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: wdc.c,v 1.297 2020/02/10 20:11:48 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: wdc.c,v 1.298 2020/04/04 21:36:15 jdolecek Exp $");
 
 #include "opt_ata.h"
 #include "opt_wdc.h"
@@ -1278,8 +1278,7 @@ wdcwait(struct ata_channel *chp, int mas
 	else {
 		error = __wdcwait(chp, mask, bits, WDCDELAY_POLL, tfd);
 		if (error != 0) {
-			if ((chp->ch_flags & ATACH_TH_RUN) ||
-			    (flags & AT_WAIT)) {
+			if (ata_is_thread_run(chp) || (flags & AT_WAIT)) {
 				/*
 				 * we're running in the channel thread
 				 * or some userland thread context

Index: src/sys/dev/scsipi/atapi_wdc.c
diff -u src/sys/dev/scsipi/atapi_wdc.c:1.136 src/sys/dev/scsipi/atapi_wdc.c:1.137
--- src/sys/dev/scsipi/atapi_wdc.c:1.136	Wed Feb 19 16:04:39 2020
+++ src/sys/dev/scsipi/atapi_wdc.c	Sat Apr  4 21:36:15 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: atapi_wdc.c,v 1.136 2020/02/19 16:04:39 riastradh Exp $	*/
+/*	$NetBSD: atapi_wdc.c,v 1.137 2020/04/04 21:36:15 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: atapi_wdc.c,v 1.136 2020/02/19 16:04:39 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: atapi_wdc.c,v 1.137 2020/04/04 21:36:15 jdolecek Exp $");
 
 #ifndef ATADEBUG
 #define ATADEBUG
@@ -501,10 +501,9 @@ wdc_atapi_start(struct ata_channel *chp,
 	/* Do control operations specially. */
 	if (__predict_false(drvp->state < READY)) {
 		/* If it's not a polled command, we need the kernel thread */
-		if ((sc_xfer->xs_control & XS_CTL_POLL) == 0 &&
-		    (chp->ch_flags & ATACH_TH_RUN) == 0) {
+		if ((sc_xfer->xs_control & XS_CTL_POLL) == 0
+		    && !ata_is_thread_run(chp))
 			return ATASTART_TH;
-		}
 		/*
 		 * disable interrupts, all commands here should be quick
 		 * enough to be able to poll, and we don't go here that often

Reply via email to