> > Throw me a few more hints, please. I already knew about error handling
> > being changed; what should I use for sleep_on(), and will the driver still
> > be able to release the ST-DMA lock on request done if queue management is
> > done by the midlevel?
>
> On second thought - the sleep_on we can perhaps do away with by cutting
> out the whole lock wait scheme (lock contention between SCSI, IDE and
> floppy are the least of my concerns). Lock release is currently done by
> the low level driver after calls to scsi_done() which seems fine. Lock
> acquisition happens in queue_command() which may sleep, causing other
> commands to time out. The error handling code suffers from the same
> problem - the SCSI timer needs to be stopped while queueing commands and
> doing error handling. Is there a way to guarantee this?

I've looked at the SCSI situation in more detail, and updated the Atari
SCSI code to cope with the changes in 2.6 (scsi_error expect different
return codes; queuecmnd called from softirq and cannot sleep). I get no
more lockups, but writing to a ZIP drive corrupts data on the drive. May
just be a broken ZIP drive, who knows. I need to try with a real SCSI
disk...

Christian, please apply this on top of my other 2.6.18 patch.

--- linux-2.6.18-m68k-ms/drivers/scsi/atari_scsi.c      2006-10-19 
14:23:32.000000000 +0200
+++ linux-2.6.18-m68k/drivers/scsi/atari_scsi.c 2006-12-31 12:28:08.895670559 
+0100
@@ -67,12 +67,39 @@

 #include <linux/module.h>

-#define NDEBUG (0)
+#define NDEBUG_ARBITRATION     0x1
+#define NDEBUG_AUTOSENSE       0x2
+#define NDEBUG_DMA             0x4
+#define NDEBUG_HANDSHAKE       0x8
+#define NDEBUG_INFORMATION     0x10
+#define NDEBUG_INIT            0x20
+#define NDEBUG_INTR            0x40
+#define NDEBUG_LINKED          0x80
+#define NDEBUG_MAIN            0x100
+#define NDEBUG_NO_DATAOUT      0x200
+#define NDEBUG_NO_WRITE                0x400
+#define NDEBUG_PIO             0x800
+#define NDEBUG_PSEUDO_DMA      0x1000
+#define NDEBUG_QUEUES          0x2000
+#define NDEBUG_RESELECTION     0x4000
+#define NDEBUG_SELECTION       0x8000
+#define NDEBUG_USLEEP          0x10000
+#define NDEBUG_LAST_BYTE_SENT  0x20000
+#define NDEBUG_RESTART_SELECT  0x40000
+#define NDEBUG_EXTENDED                0x80000
+#define NDEBUG_C400_PREAD      0x100000
+#define NDEBUG_C400_PWRITE     0x200000
+#define NDEBUG_LISTS           0x400000

 #define NDEBUG_ABORT   0x800000
 #define NDEBUG_TAGS    0x1000000
 #define NDEBUG_MERGING 0x2000000

+#define NDEBUG_ANY             0xFFFFFFFFUL
+
+#define NDEBUG (0)
+//#define NDEBUG (NDEBUG_MAIN)
+
 #define AUTOSENSE
 /* For the Atari version, use only polled IO or REAL_DMA */
 #define        REAL_DMA
@@ -198,7 +225,7 @@
 static irqreturn_t scsi_falcon_intr( int irq, void *dummy, struct pt_regs *fp);
 static void falcon_release_lock_if_possible( struct NCR5380_hostdata *
                                              hostdata );
-static void falcon_get_lock( void );
+static int falcon_get_lock( void );
 #ifdef CONFIG_ATARI_SCSI_RESET_BOOT
 static void atari_scsi_reset_boot( void );
 #endif
@@ -506,6 +533,15 @@
  * again (but others waiting longer more probably will win).
  */

+/* MSch 20061228: in 2.6, the fairness wait appears to open a race with
+ * the IDE driver's use of the lock, resulting in scheduling_in_interrupt
+ * level BUG() messages.
+ * The low level queuecmd function now appears to be called from soft
+ * interrupt context (block queue task??) and cannot sleep on the lock
+ * anymore once IDE has stolen the lock.
+ * Can we return 'please retry later' to the block queue task or mid level??
+ * MSch 20061229: the race persists regardless ... leave it off for now.
+ */
 static void
 falcon_release_lock_if_possible( struct NCR5380_hostdata * hostdata )
 {
@@ -529,7 +565,9 @@
                }
                falcon_got_lock = 0;
                stdma_release();
+#if defined(FALCON_FAIRNESS_WAIT)
                wake_up( &falcon_fairness_wait );
+#endif
        }

        local_irq_restore(flags);
@@ -550,20 +588,37 @@
  * Complicated, complicated.... Sigh...
  */

-static void falcon_get_lock( void )
+/* MSch 20061229: atari_queue_command gets called from softirq context quite
+ * heavily in the 2.6 kernel series. Since atari_queue_command might need to
+ * sleep in order to grab the ST-DMA lock, I have modified falcon_get_lock to
+ * immediately return with error status if called in softirq context with the
+ * lock not currently held by the SCSI driver, and the ST-DMA locked by some
+ * other driver. atari_queue_command then returns SCSI_MLQUEUE_HOST_BUSY and
+ * prevents further commands from issueing.
+ */
+
+static int falcon_get_lock( void )
 {
        unsigned long flags;

-       if (IS_A_TT()) return;
+       if (IS_A_TT()) return 0;

        local_irq_save(flags);

+#if defined (FALCON_FAIRNESS_WAIT)
        while( !in_irq() && falcon_got_lock && stdma_others_waiting() )
                sleep_on( &falcon_fairness_wait );
+#endif

        while (!falcon_got_lock) {
                if (in_irq())
                        panic( "Falcon SCSI hasn't ST-DMA lock in interrupt" );
+               /* we may not sleep in soft interrupts neither, so bail out */
+               if (in_softirq() && stdma_islocked()) {
+                       printk(KERN_INFO "Falcon SCSI does not hold ST-DMA lock 
in softirq!\n" );
+                       local_irq_restore(flags);
+                       return 1;
+               }
                if (!falcon_trying_lock) {
                        falcon_trying_lock = 1;
                        stdma_lock(scsi_falcon_intr, NULL);
@@ -579,6 +634,8 @@
        local_irq_restore(flags);
        if (!falcon_got_lock)
                panic("Falcon SCSI: someone stole the lock :-(\n");
+
+       return 0;
 }


@@ -824,6 +881,8 @@
        struct NCR5380_hostdata *hostdata =
                (struct NCR5380_hostdata *)cmd->device->host->hostdata;

+       printk( "scsi%d: resetting the SCSI bus!\n", 
(cmd)->device->host->host_no);
+
        /* For doing the reset, SCSI interrupts must be disabled first,
         * since the 5380 raises its IRQ line while _RST is active and we
         * can't disable interrupts completely, since we need the timer.
@@ -853,8 +912,10 @@
        else {
                atari_turnon_irq( IRQ_MFP_FSCSI );
        }
-       if ((rv & SCSI_RESET_ACTION) == SCSI_RESET_SUCCESS)
+       if (rv == SUCCESS) {
                falcon_release_lock_if_possible(hostdata);
+       }
+       printk( "scsi%d: bus reset done!\n", (cmd)->device->host->host_no);

        return( rv );
 }
--- linux-2.6.18-m68k-ms/drivers/scsi/atari_NCR5380.c   2006-10-19 
14:23:32.000000000 +0200
+++ linux-2.6.18-m68k/drivers/scsi/atari_NCR5380.c      2006-12-31 
12:34:13.013137465 +0100
@@ -474,7 +474,8 @@
         virt_to_phys(page_address(cmd->SCp.buffer[1].page)+
                      cmd->SCp.buffer[1].offset) == endaddr; ) {
        MER_PRINTK("VTOP(%p) == %08lx -> merging\n",
-                  cmd->SCp.buffer[1].address, endaddr);
+                  
page_address(cmd->SCp.buffer[1].page)+cmd->SCp.buffer[1].offset,
+                  endaddr);
 #if (NDEBUG & NDEBUG_MERGING)
        ++cnt;
 #endif
@@ -987,17 +988,6 @@
 #endif

     /*
-     * We use the host_scribble field as a pointer to the next command
-     * in a queue
-     */
-
-    NEXT(cmd) = NULL;
-    cmd->scsi_done = done;
-
-    cmd->result = 0;
-
-
-    /*
      * Insert the cmd into the issue queue. Note that REQUEST SENSE
      * commands are added to the head of the queue since any command will
      * clear the contingent allegiance condition that exists and the
@@ -1018,10 +1008,31 @@
      * alter queues and touch the lock.
      */
     if (!IS_A_TT()) {
+         int rv;
+         /* MSch: since we get called from softirq context here, and cannot
+          * sleep safely, the return status of falcon_get_lock is now used to
+          * figure out if we could successfully lock, or need to bail out.
+          * Signal the midlevel we're unable to queue the command in this case.
+          */
        oldto = atari_scsi_update_timeout(cmd, 0);
-       falcon_get_lock();
+       rv = falcon_get_lock();
        atari_scsi_update_timeout(cmd, oldto);
+       if (rv) {
+         local_irq_restore(flags);
+         return SCSI_MLQUEUE_HOST_BUSY;
+        }
     }
+
+    /*
+     * We use the host_scribble field as a pointer to the next command
+     * in a queue
+     */
+
+    NEXT(cmd) = NULL;
+    cmd->scsi_done = done;
+
+    cmd->result = 0;
+
     if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) {
        LIST(cmd, hostdata->issue_queue);
        NEXT(cmd) = hostdata->issue_queue;
@@ -1045,10 +1056,13 @@
      * If we're not in an interrupt, we can call NCR5380_main()
      * unconditionally, because it cannot be already running.
      */
-    if (in_interrupt() || ((flags >> 8) & 7) >= 6)
+
+    /* MSch: in 2.6.19, we need to unconditionally use the task queue
+     * instead of directly starting main. Yet another side effect of
+     * the softirq business, I bet. */
+
        queue_main();
-    else
-       NCR5380_main(NULL);
+
     return 0;
 }

@@ -2670,7 +2684,7 @@
  *     host byte of the result field to, if zero DID_ABORTED is
  *     used.
  *
- * Returns : 0 - success, -1 on failure.
+ * Returns : SUCCESS - success, FAILED on failure.
  *
  * XXX - there is no way to abort the command that is currently
  *      connected, you have to wait for it to complete.  If this is
@@ -2740,11 +2754,12 @@
          local_irq_restore(flags);
          cmd->scsi_done(cmd);
          falcon_release_lock_if_possible( hostdata );
-         return SCSI_ABORT_SUCCESS;
+          return SUCCESS;
        } else {
+       /* Why is this not restoring IRQs?? */
 /*       local_irq_restore(flags); */
          printk("scsi%d: abort of connected command failed!\n", HOSTNO);
-         return SCSI_ABORT_ERROR;
+          return FAILED;
        }
    }
 #endif
@@ -2768,7 +2783,7 @@
             * yet... */
            tmp->scsi_done(tmp);
            falcon_release_lock_if_possible( hostdata );
-           return SCSI_ABORT_SUCCESS;
+           return SUCCESS;
        }

 /*
@@ -2785,7 +2800,7 @@
     if (hostdata->connected) {
        local_irq_restore(flags);
        ABRT_PRINTK("scsi%d: abort failed, command connected.\n", HOSTNO);
-        return SCSI_ABORT_SNOOZE;
+        return FAILED;
     }

 /*
@@ -2820,7 +2835,7 @@
            ABRT_PRINTK("scsi%d: aborting disconnected command.\n", HOSTNO);

             if (NCR5380_select (instance, cmd, (int) cmd->tag))
-               return SCSI_ABORT_BUSY;
+               return FAILED;

            ABRT_PRINTK("scsi%d: nexus reestablished.\n", HOSTNO);

@@ -2847,7 +2862,7 @@
                    local_irq_restore(flags);
                    tmp->scsi_done(tmp);
                    falcon_release_lock_if_possible( hostdata );
-                   return SCSI_ABORT_SUCCESS;
+                   return SUCCESS;
                }
        }

@@ -2871,10 +2886,9 @@
  */
     falcon_release_lock_if_possible( hostdata );

-    return SCSI_ABORT_NOT_RUNNING;
+    return SUCCESS;
 }

-
 /*
  * Function : int NCR5380_reset (Scsi_Cmnd *cmd)
  *
@@ -2880,7 +2894,7 @@
  *
  * Purpose : reset the SCSI bus.
  *
- * Returns : SCSI_RESET_WAKEUP
+ * Returns : SUCCESS
  *
  */

@@ -2945,15 +2959,15 @@
      */

     if ((cmd = connected)) {
-       ABRT_PRINTK("scsi%d: reset aborted a connected command\n", H_NO(cmd));
-       cmd->result = (cmd->result & 0xffff) | (DID_RESET << 16);
+       ABRT_PRINTK("scsi%d: reset aborted a connected command, calling 
scsi_done() ...\n", H_NO(cmd));
+       cmd->result = (DID_RESET << 16);
        cmd->scsi_done( cmd );
     }

     for (i = 0; (cmd = disconnected_queue); ++i) {
        disconnected_queue = NEXT(cmd);
        NEXT(cmd) = NULL;
-       cmd->result = (cmd->result & 0xffff) | (DID_RESET << 16);
+       cmd->result = (DID_RESET << 16);
        cmd->scsi_done( cmd );
     }
     if (i > 0)
@@ -2970,7 +2984,7 @@
      * the midlevel code that the reset was SUCCESSFUL, and there is no
      * need to 'wake up' the commands by a request_sense
      */
-    return SCSI_RESET_SUCCESS | SCSI_RESET_BUS_RESET;
+    return SUCCESS;
 #else /* 1 */

     /* MSch: new-style reset handling: let the mid-level do what it can */
@@ -3018,7 +3032,8 @@
     local_irq_restore(flags);

     /* we did no complete reset of all commands, so a wakeup is required */
-    return SCSI_RESET_WAKEUP | SCSI_RESET_BUS_RESET;
+    /* The new error handler code implicitly does this for us anyway */
+    return SUCCESS;
 #endif /* 1 */
 }

Signed-Off-By: [EMAIL PROTECTED]

        Michael
-
To unsubscribe from this list: send the line "unsubscribe linux-m68k" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to