> > Throw me a few more hints, please. I already knew about error handling
> > being changed; what should I use for sleep_on(), and will the driver still
> > be able to release the ST-DMA lock on request done if queue management is
> > done by the midlevel?
>
> On second thought - the sleep_on we can perhaps do away with by cutting
> out the whole lock wait scheme (lock contention between SCSI, IDE and
> floppy are the least of my concerns). Lock release is currently done by
> the low level driver after calls to scsi_done() which seems fine. Lock
> acquisition happens in queue_command() which may sleep, causing other
> commands to time out. The error handling code suffers from the same
> problem - the SCSI timer needs to be stopped while queueing commands and
> doing error handling. Is there a way to guarantee this?
I've looked at the SCSI situation in more detail, and updated the Atari
SCSI code to cope with the changes in 2.6 (scsi_error expect different
return codes; queuecmnd called from softirq and cannot sleep). I get no
more lockups, but writing to a ZIP drive corrupts data on the drive. May
just be a broken ZIP drive, who knows. I need to try with a real SCSI
disk...
Christian, please apply this on top of my other 2.6.18 patch.
--- linux-2.6.18-m68k-ms/drivers/scsi/atari_scsi.c 2006-10-19
14:23:32.000000000 +0200
+++ linux-2.6.18-m68k/drivers/scsi/atari_scsi.c 2006-12-31 12:28:08.895670559
+0100
@@ -67,12 +67,39 @@
#include <linux/module.h>
-#define NDEBUG (0)
+#define NDEBUG_ARBITRATION 0x1
+#define NDEBUG_AUTOSENSE 0x2
+#define NDEBUG_DMA 0x4
+#define NDEBUG_HANDSHAKE 0x8
+#define NDEBUG_INFORMATION 0x10
+#define NDEBUG_INIT 0x20
+#define NDEBUG_INTR 0x40
+#define NDEBUG_LINKED 0x80
+#define NDEBUG_MAIN 0x100
+#define NDEBUG_NO_DATAOUT 0x200
+#define NDEBUG_NO_WRITE 0x400
+#define NDEBUG_PIO 0x800
+#define NDEBUG_PSEUDO_DMA 0x1000
+#define NDEBUG_QUEUES 0x2000
+#define NDEBUG_RESELECTION 0x4000
+#define NDEBUG_SELECTION 0x8000
+#define NDEBUG_USLEEP 0x10000
+#define NDEBUG_LAST_BYTE_SENT 0x20000
+#define NDEBUG_RESTART_SELECT 0x40000
+#define NDEBUG_EXTENDED 0x80000
+#define NDEBUG_C400_PREAD 0x100000
+#define NDEBUG_C400_PWRITE 0x200000
+#define NDEBUG_LISTS 0x400000
#define NDEBUG_ABORT 0x800000
#define NDEBUG_TAGS 0x1000000
#define NDEBUG_MERGING 0x2000000
+#define NDEBUG_ANY 0xFFFFFFFFUL
+
+#define NDEBUG (0)
+//#define NDEBUG (NDEBUG_MAIN)
+
#define AUTOSENSE
/* For the Atari version, use only polled IO or REAL_DMA */
#define REAL_DMA
@@ -198,7 +225,7 @@
static irqreturn_t scsi_falcon_intr( int irq, void *dummy, struct pt_regs *fp);
static void falcon_release_lock_if_possible( struct NCR5380_hostdata *
hostdata );
-static void falcon_get_lock( void );
+static int falcon_get_lock( void );
#ifdef CONFIG_ATARI_SCSI_RESET_BOOT
static void atari_scsi_reset_boot( void );
#endif
@@ -506,6 +533,15 @@
* again (but others waiting longer more probably will win).
*/
+/* MSch 20061228: in 2.6, the fairness wait appears to open a race with
+ * the IDE driver's use of the lock, resulting in scheduling_in_interrupt
+ * level BUG() messages.
+ * The low level queuecmd function now appears to be called from soft
+ * interrupt context (block queue task??) and cannot sleep on the lock
+ * anymore once IDE has stolen the lock.
+ * Can we return 'please retry later' to the block queue task or mid level??
+ * MSch 20061229: the race persists regardless ... leave it off for now.
+ */
static void
falcon_release_lock_if_possible( struct NCR5380_hostdata * hostdata )
{
@@ -529,7 +565,9 @@
}
falcon_got_lock = 0;
stdma_release();
+#if defined(FALCON_FAIRNESS_WAIT)
wake_up( &falcon_fairness_wait );
+#endif
}
local_irq_restore(flags);
@@ -550,20 +588,37 @@
* Complicated, complicated.... Sigh...
*/
-static void falcon_get_lock( void )
+/* MSch 20061229: atari_queue_command gets called from softirq context quite
+ * heavily in the 2.6 kernel series. Since atari_queue_command might need to
+ * sleep in order to grab the ST-DMA lock, I have modified falcon_get_lock to
+ * immediately return with error status if called in softirq context with the
+ * lock not currently held by the SCSI driver, and the ST-DMA locked by some
+ * other driver. atari_queue_command then returns SCSI_MLQUEUE_HOST_BUSY and
+ * prevents further commands from issueing.
+ */
+
+static int falcon_get_lock( void )
{
unsigned long flags;
- if (IS_A_TT()) return;
+ if (IS_A_TT()) return 0;
local_irq_save(flags);
+#if defined (FALCON_FAIRNESS_WAIT)
while( !in_irq() && falcon_got_lock && stdma_others_waiting() )
sleep_on( &falcon_fairness_wait );
+#endif
while (!falcon_got_lock) {
if (in_irq())
panic( "Falcon SCSI hasn't ST-DMA lock in interrupt" );
+ /* we may not sleep in soft interrupts neither, so bail out */
+ if (in_softirq() && stdma_islocked()) {
+ printk(KERN_INFO "Falcon SCSI does not hold ST-DMA lock
in softirq!\n" );
+ local_irq_restore(flags);
+ return 1;
+ }
if (!falcon_trying_lock) {
falcon_trying_lock = 1;
stdma_lock(scsi_falcon_intr, NULL);
@@ -579,6 +634,8 @@
local_irq_restore(flags);
if (!falcon_got_lock)
panic("Falcon SCSI: someone stole the lock :-(\n");
+
+ return 0;
}
@@ -824,6 +881,8 @@
struct NCR5380_hostdata *hostdata =
(struct NCR5380_hostdata *)cmd->device->host->hostdata;
+ printk( "scsi%d: resetting the SCSI bus!\n",
(cmd)->device->host->host_no);
+
/* For doing the reset, SCSI interrupts must be disabled first,
* since the 5380 raises its IRQ line while _RST is active and we
* can't disable interrupts completely, since we need the timer.
@@ -853,8 +912,10 @@
else {
atari_turnon_irq( IRQ_MFP_FSCSI );
}
- if ((rv & SCSI_RESET_ACTION) == SCSI_RESET_SUCCESS)
+ if (rv == SUCCESS) {
falcon_release_lock_if_possible(hostdata);
+ }
+ printk( "scsi%d: bus reset done!\n", (cmd)->device->host->host_no);
return( rv );
}
--- linux-2.6.18-m68k-ms/drivers/scsi/atari_NCR5380.c 2006-10-19
14:23:32.000000000 +0200
+++ linux-2.6.18-m68k/drivers/scsi/atari_NCR5380.c 2006-12-31
12:34:13.013137465 +0100
@@ -474,7 +474,8 @@
virt_to_phys(page_address(cmd->SCp.buffer[1].page)+
cmd->SCp.buffer[1].offset) == endaddr; ) {
MER_PRINTK("VTOP(%p) == %08lx -> merging\n",
- cmd->SCp.buffer[1].address, endaddr);
+
page_address(cmd->SCp.buffer[1].page)+cmd->SCp.buffer[1].offset,
+ endaddr);
#if (NDEBUG & NDEBUG_MERGING)
++cnt;
#endif
@@ -987,17 +988,6 @@
#endif
/*
- * We use the host_scribble field as a pointer to the next command
- * in a queue
- */
-
- NEXT(cmd) = NULL;
- cmd->scsi_done = done;
-
- cmd->result = 0;
-
-
- /*
* Insert the cmd into the issue queue. Note that REQUEST SENSE
* commands are added to the head of the queue since any command will
* clear the contingent allegiance condition that exists and the
@@ -1018,10 +1008,31 @@
* alter queues and touch the lock.
*/
if (!IS_A_TT()) {
+ int rv;
+ /* MSch: since we get called from softirq context here, and cannot
+ * sleep safely, the return status of falcon_get_lock is now used to
+ * figure out if we could successfully lock, or need to bail out.
+ * Signal the midlevel we're unable to queue the command in this case.
+ */
oldto = atari_scsi_update_timeout(cmd, 0);
- falcon_get_lock();
+ rv = falcon_get_lock();
atari_scsi_update_timeout(cmd, oldto);
+ if (rv) {
+ local_irq_restore(flags);
+ return SCSI_MLQUEUE_HOST_BUSY;
+ }
}
+
+ /*
+ * We use the host_scribble field as a pointer to the next command
+ * in a queue
+ */
+
+ NEXT(cmd) = NULL;
+ cmd->scsi_done = done;
+
+ cmd->result = 0;
+
if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) {
LIST(cmd, hostdata->issue_queue);
NEXT(cmd) = hostdata->issue_queue;
@@ -1045,10 +1056,13 @@
* If we're not in an interrupt, we can call NCR5380_main()
* unconditionally, because it cannot be already running.
*/
- if (in_interrupt() || ((flags >> 8) & 7) >= 6)
+
+ /* MSch: in 2.6.19, we need to unconditionally use the task queue
+ * instead of directly starting main. Yet another side effect of
+ * the softirq business, I bet. */
+
queue_main();
- else
- NCR5380_main(NULL);
+
return 0;
}
@@ -2670,7 +2684,7 @@
* host byte of the result field to, if zero DID_ABORTED is
* used.
*
- * Returns : 0 - success, -1 on failure.
+ * Returns : SUCCESS - success, FAILED on failure.
*
* XXX - there is no way to abort the command that is currently
* connected, you have to wait for it to complete. If this is
@@ -2740,11 +2754,12 @@
local_irq_restore(flags);
cmd->scsi_done(cmd);
falcon_release_lock_if_possible( hostdata );
- return SCSI_ABORT_SUCCESS;
+ return SUCCESS;
} else {
+ /* Why is this not restoring IRQs?? */
/* local_irq_restore(flags); */
printk("scsi%d: abort of connected command failed!\n", HOSTNO);
- return SCSI_ABORT_ERROR;
+ return FAILED;
}
}
#endif
@@ -2768,7 +2783,7 @@
* yet... */
tmp->scsi_done(tmp);
falcon_release_lock_if_possible( hostdata );
- return SCSI_ABORT_SUCCESS;
+ return SUCCESS;
}
/*
@@ -2785,7 +2800,7 @@
if (hostdata->connected) {
local_irq_restore(flags);
ABRT_PRINTK("scsi%d: abort failed, command connected.\n", HOSTNO);
- return SCSI_ABORT_SNOOZE;
+ return FAILED;
}
/*
@@ -2820,7 +2835,7 @@
ABRT_PRINTK("scsi%d: aborting disconnected command.\n", HOSTNO);
if (NCR5380_select (instance, cmd, (int) cmd->tag))
- return SCSI_ABORT_BUSY;
+ return FAILED;
ABRT_PRINTK("scsi%d: nexus reestablished.\n", HOSTNO);
@@ -2847,7 +2862,7 @@
local_irq_restore(flags);
tmp->scsi_done(tmp);
falcon_release_lock_if_possible( hostdata );
- return SCSI_ABORT_SUCCESS;
+ return SUCCESS;
}
}
@@ -2871,10 +2886,9 @@
*/
falcon_release_lock_if_possible( hostdata );
- return SCSI_ABORT_NOT_RUNNING;
+ return SUCCESS;
}
-
/*
* Function : int NCR5380_reset (Scsi_Cmnd *cmd)
*
@@ -2880,7 +2894,7 @@
*
* Purpose : reset the SCSI bus.
*
- * Returns : SCSI_RESET_WAKEUP
+ * Returns : SUCCESS
*
*/
@@ -2945,15 +2959,15 @@
*/
if ((cmd = connected)) {
- ABRT_PRINTK("scsi%d: reset aborted a connected command\n", H_NO(cmd));
- cmd->result = (cmd->result & 0xffff) | (DID_RESET << 16);
+ ABRT_PRINTK("scsi%d: reset aborted a connected command, calling
scsi_done() ...\n", H_NO(cmd));
+ cmd->result = (DID_RESET << 16);
cmd->scsi_done( cmd );
}
for (i = 0; (cmd = disconnected_queue); ++i) {
disconnected_queue = NEXT(cmd);
NEXT(cmd) = NULL;
- cmd->result = (cmd->result & 0xffff) | (DID_RESET << 16);
+ cmd->result = (DID_RESET << 16);
cmd->scsi_done( cmd );
}
if (i > 0)
@@ -2970,7 +2984,7 @@
* the midlevel code that the reset was SUCCESSFUL, and there is no
* need to 'wake up' the commands by a request_sense
*/
- return SCSI_RESET_SUCCESS | SCSI_RESET_BUS_RESET;
+ return SUCCESS;
#else /* 1 */
/* MSch: new-style reset handling: let the mid-level do what it can */
@@ -3018,7 +3032,8 @@
local_irq_restore(flags);
/* we did no complete reset of all commands, so a wakeup is required */
- return SCSI_RESET_WAKEUP | SCSI_RESET_BUS_RESET;
+ /* The new error handler code implicitly does this for us anyway */
+ return SUCCESS;
#endif /* 1 */
}
Signed-Off-By: [EMAIL PROTECTED]
Michael
-
To unsubscribe from this list: send the line "unsubscribe linux-m68k" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html