On Fri, 18 Feb 2005, Samuel Colin wrote:

> Hello list,
> it's me again. I report a new problem with the other disk (so the previous
> problem is still solved, don't worry).
> The kernel I used is a 2.6.11-rc4-bk6 with the following patches : 
> http://sjdcolin.free.fr/tmp/patches/
> 
> I did the tests with usb-storage debug and usb debug activated, the logs are
> here : 
> http://sjdcolin.free.fr/tmp/
> 
> The test itself : a fsck.ext3 (or ext2, depends) on the partition of the
> offending usb2-rated disk.
> logs-1 : fsck.ext2 on sda1, blocks in the middle (but not always at the same
> place)
> logs-2 : fsck.ext3 on sda5, blocks also while fscking
> logs-3-scsi : same as logs-2, but with scsi logging fully activated (all 7),
> as the problem might be related to scsi
> logs-4-uhci : same as logs-2 and logs-1, but with uhci instead of ehci (the
> fsck was successful).
> 
> The lists.tar.gz shows previous lsusb and lspci, it did not change much so I
> did not regenerate them. The problem might be only related to scsi handling
> of the firmware of the disk, but I prefer to post here to go the "right
> way", from the downhill possible causes to the uphill possible causes.
> I attached an excerpt of the logs where the errors first appear. After a
> quick googling, I had seen similar but not exactly identical error reports.
> 
> What is "interesting" is that the copy of many big files from another (usb)
> disk did not cause any error, a huge reading neither (for a md5sum of the
> copied files), while a simple fsck causes problems. Maybe fsck sends
> particular, disk-controller related commands, making me say that the
> problem could be located at the scsi level.
> 
> Any idea ?
> Regards,
> Samuel Colin

This looks like another device problem (not a SCSI problem).  I noticed
that "delay.patch" in your patches directory is meant for 2.6.10; did
you make the corresponding change for 2.6.11 by hand?  And is the udelay
length set to 110 instead of 100?  If either of those isn't true, it's the 
first thing you should try.

As a partial workaround, there is an experimental patch below.  It was 
written a couple of months ago, but it should still apply with some 
offsets to your kernel.  It won't prevent the errors from occurring, but 
it ought to do a better job of recovering from them.  (Technically, it 
tries a USB port reset before doing a class-specific device reset.)

By the way, you don't have to include all those different log files in 
your tar archives.  kern.log or syslog along would be enough; they each 
contain all the useful information.

Alan Stern



===== drivers/usb/storage/scsiglue.c 1.90 vs edited =====
--- 1.90/drivers/usb/storage/scsiglue.c 2004-11-22 13:42:02 -05:00
+++ edited/drivers/usb/storage/scsiglue.c       2004-12-17 11:27:47 -05:00
@@ -250,57 +250,29 @@
 
        /* lock the device pointers and do the reset */
        down(&(us->dev_semaphore));
-       if (test_bit(US_FLIDX_DISCONNECTING, &us->flags)) {
-               result = FAILED;
-               US_DEBUGP("No reset during disconnect\n");
-       } else
-               result = us->transport_reset(us);
+       result = us->transport_reset(us);
        up(&(us->dev_semaphore));
 
        /* lock the host for the return */
        scsi_lock(srb->device->host);
-       return result;
+       return result < 0 ? FAILED : SUCCESS;
 }
 
-/* This resets the device's USB port. */
-/* It refuses to work if there's more than one interface in
- * the device, so that other users are not affected. */
+/* Simulate a SCSI bus reset by resetting the device's USB port. */
 /* This is always called with scsi_lock(srb->host) held */
 static int bus_reset(struct scsi_cmnd *srb)
 {
        struct us_data *us = (struct us_data *)srb->device->host->hostdata[0];
-       int result, rc;
+       int result;
 
        US_DEBUGP("%s called\n", __FUNCTION__);
 
        scsi_unlock(srb->device->host);
 
-       /* The USB subsystem doesn't handle synchronisation between
-        * a device's several drivers. Therefore we reset only devices
-        * with just one interface, which we of course own. */
-
        down(&(us->dev_semaphore));
-       if (test_bit(US_FLIDX_DISCONNECTING, &us->flags)) {
-               result = -EIO;
-               US_DEBUGP("No reset during disconnect\n");
-       } else if (us->pusb_dev->actconfig->desc.bNumInterfaces != 1) {
-               result = -EBUSY;
-               US_DEBUGP("Refusing to reset a multi-interface device\n");
-       } else {
-               rc = usb_lock_device_for_reset(us->pusb_dev, us->pusb_intf);
-               if (rc < 0) {
-                       US_DEBUGP("unable to lock device for reset: %d\n", rc);
-                       result = rc;
-               } else {
-                       result = usb_reset_device(us->pusb_dev);
-                       if (rc)
-                               usb_unlock_device(us->pusb_dev);
-                       US_DEBUGP("usb_reset_device returns %d\n", result);
-               }
-       }
+       result = usb_stor_port_reset(us);
        up(&(us->dev_semaphore));
 
-       /* lock the host for the return */
        scsi_lock(srb->device->host);
        return result < 0 ? FAILED : SUCCESS;
 }
@@ -317,6 +289,14 @@
                for (i = 1; i < us->host->max_id; ++i)
                        scsi_report_device_reset(us->host, 0, i);
        }
+}
+
+/* Report a driver-initiated bus reset to the SCSI layer.
+ * Calling this for a SCSI-initiated reset is unnecessary but harmless.
+ * The caller must own the SCSI host lock. */
+void usb_stor_report_bus_reset(struct us_data *us)
+{
+       scsi_report_bus_reset(us->host, 0);
 }
 
 /***********************************************************************
===== drivers/usb/storage/scsiglue.h 1.13 vs edited =====
--- 1.13/drivers/usb/storage/scsiglue.h 2004-08-24 10:55:47 -04:00
+++ edited/drivers/usb/storage/scsiglue.h       2004-12-17 11:27:47 -05:00
@@ -47,6 +47,7 @@
 struct scsi_cmnd;
 
 extern void usb_stor_report_device_reset(struct us_data *us);
+extern void usb_stor_report_bus_reset(struct us_data *us);
 
 extern unsigned char usb_stor_sense_notready[18];
 extern unsigned char usb_stor_sense_invalidCDB[18];
===== drivers/usb/storage/transport.c 1.155 vs edited =====
--- 1.155/drivers/usb/storage/transport.c       2004-12-15 16:25:40 -05:00
+++ edited/drivers/usb/storage/transport.c      2004-12-17 11:27:47 -05:00
@@ -266,8 +266,9 @@
                NULL, 0, 3*HZ);
 
        /* reset the endpoint toggle */
-       usb_settoggle(us->pusb_dev, usb_pipeendpoint(pipe),
-               usb_pipeout(pipe), 0);
+       if (result >= 0)
+               usb_settoggle(us->pusb_dev, usb_pipeendpoint(pipe),
+                               usb_pipeout(pipe), 0);
 
        US_DEBUGP("%s: result = %d\n", __FUNCTION__, result);
        return result;
@@ -539,15 +540,15 @@
         */
        if (test_bit(US_FLIDX_TIMED_OUT, &us->flags)) {
                US_DEBUGP("-- command was aborted\n");
-               goto Handle_Abort;
+               srb->result = DID_ABORT << 16;
+               goto Handle_Errors;
        }
 
        /* if there is a transport error, reset and don't auto-sense */
        if (result == USB_STOR_TRANSPORT_ERROR) {
                US_DEBUGP("-- transport indicates error, resetting\n");
-               us->transport_reset(us);
                srb->result = DID_ERROR << 16;
-               return;
+               goto Handle_Errors;
        }
 
        /* if the transport provided its own sense data, don't auto-sense */
@@ -667,7 +668,8 @@
 
                if (test_bit(US_FLIDX_TIMED_OUT, &us->flags)) {
                        US_DEBUGP("-- auto-sense aborted\n");
-                       goto Handle_Abort;
+                       srb->result = DID_ABORT << 16;
+                       goto Handle_Errors;
                }
                if (temp_result != USB_STOR_TRANSPORT_GOOD) {
                        US_DEBUGP("-- auto-sense failure\n");
@@ -676,9 +678,9 @@
                         * multi-target device, since failure of an
                         * auto-sense is perfectly valid
                         */
-                       if (!(us->flags & US_FL_SCM_MULT_TARG))
-                               us->transport_reset(us);
                        srb->result = DID_ERROR << 16;
+                       if (!(us->flags & US_FL_SCM_MULT_TARG))
+                               goto Handle_Errors;
                        return;
                }
 
@@ -719,12 +721,28 @@
 
        return;
 
-       /* abort processing: the bulk-only transport requires a reset
-        * following an abort */
-  Handle_Abort:
-       srb->result = DID_ABORT << 16;
-       if (us->protocol == US_PR_BULK)
+       /* Error and abort processing: try to resynchronize with the device
+        * by issuing a port reset.  If that fails, try a class-specific
+        * device reset. */
+  Handle_Errors:
+
+       /* Let the SCSI layer know we are doing a reset, set the
+        * RESETTING bit, and clear the ABORTING bit so that the reset
+        * may proceed. */
+       scsi_lock(us->host);
+       usb_stor_report_bus_reset(us);
+       set_bit(US_FLIDX_RESETTING, &us->flags);
+       clear_bit(US_FLIDX_ABORTING, &us->flags);
+       scsi_unlock(us->host);
+
+       result = usb_stor_port_reset(us);
+       if (result < 0) {
+               scsi_lock(us->host);
+               usb_stor_report_device_reset(us);
+               scsi_unlock(us->host);
                us->transport_reset(us);
+       }
+       clear_bit(US_FLIDX_RESETTING, &us->flags);
 }
 
 /* Stop the current URB transfer */
@@ -1111,7 +1129,7 @@
  * It's handy that every transport mechanism uses the control endpoint for
  * resets.
  *
- * Basically, we send a reset with a 20-second timeout, so we don't get
+ * Basically, we send a reset with a 5-second timeout, so we don't get
  * jammed attempting to do the reset.
  */
 static int usb_stor_reset_common(struct us_data *us,
@@ -1120,28 +1138,18 @@
 {
        int result;
        int result2;
-       int rc = FAILED;
 
-       /* Let the SCSI layer know we are doing a reset, set the
-        * RESETTING bit, and clear the ABORTING bit so that the reset
-        * may proceed.
-        */
-       scsi_lock(us->host);
-       usb_stor_report_device_reset(us);
-       set_bit(US_FLIDX_RESETTING, &us->flags);
-       clear_bit(US_FLIDX_ABORTING, &us->flags);
-       scsi_unlock(us->host);
+       if (test_bit(US_FLIDX_DISCONNECTING, &us->flags)) {
+               US_DEBUGP("No reset during disconnect\n");
+               return -EIO;
+       }
 
-       /* A 20-second timeout may seem rather long, but a LaCie
-        * StudioDrive USB2 device takes 16+ seconds to get going
-        * following a powerup or USB attach event.
-        */
        result = usb_stor_control_msg(us, us->send_ctrl_pipe,
                        request, requesttype, value, index, data, size,
-                       20*HZ);
+                       5*HZ);
        if (result < 0) {
                US_DEBUGP("Soft reset failed: %d\n", result);
-               goto Done;
+               return result;
        }
 
        /* Give the device some time to recover from the reset,
@@ -1151,7 +1159,7 @@
                        HZ*6);
        if (test_bit(US_FLIDX_DISCONNECTING, &us->flags)) {
                US_DEBUGP("Reset interrupted by disconnect\n");
-               goto Done;
+               return -EIO;
        }
 
        US_DEBUGP("Soft reset: clearing bulk-in endpoint halt\n");
@@ -1160,17 +1168,14 @@
        US_DEBUGP("Soft reset: clearing bulk-out endpoint halt\n");
        result2 = usb_stor_clear_halt(us, us->send_bulk_pipe);
 
-       /* return a result code based on the result of the control message */
-       if (result < 0 || result2 < 0) {
+       /* return a result code based on the result of the clear-halts */
+       if (result >= 0)
+               result = result2;
+       if (result < 0)
                US_DEBUGP("Soft reset failed\n");
-               goto Done;
-       }
-       US_DEBUGP("Soft reset done\n");
-       rc = SUCCESS;
-
-  Done:
-       clear_bit(US_FLIDX_RESETTING, &us->flags);
-       return rc;
+       else
+               US_DEBUGP("Soft reset done\n");
+       return result;
 }
 
 /* This issues a CB[I] Reset to the device in question
@@ -1199,4 +1204,33 @@
        return usb_stor_reset_common(us, US_BULK_RESET_REQUEST, 
                                 USB_TYPE_CLASS | USB_RECIP_INTERFACE,
                                 0, us->ifnum, NULL, 0);
+}
+
+/* Issue a USB port reset to the device.  But don't do anything if
+ * there's more than one interface in the device, so that other users
+ * are not affected. */
+int usb_stor_port_reset(struct us_data *us)
+{
+       int result, rc;
+
+       if (test_bit(US_FLIDX_DISCONNECTING, &us->flags)) {
+               result = -EIO;
+               US_DEBUGP("No reset during disconnect\n");
+       } else if (us->pusb_dev->actconfig->desc.bNumInterfaces != 1) {
+               result = -EBUSY;
+               US_DEBUGP("Refusing to reset a multi-interface device\n");
+       } else {
+               result = rc =
+                       usb_lock_device_for_reset(us->pusb_dev, us->pusb_intf);
+               if (result < 0) {
+                       US_DEBUGP("unable to lock device for reset: %d\n",
+                                       result);
+               } else {
+                       result = usb_reset_device(us->pusb_dev);
+                       if (rc)
+                               usb_unlock_device(us->pusb_dev);
+                       US_DEBUGP("usb_reset_device returns %d\n", result);
+               }
+       }
+       return result;
 }
===== drivers/usb/storage/transport.h 1.44 vs edited =====
--- 1.44/drivers/usb/storage/transport.h        2004-10-11 13:52:09 -04:00
+++ edited/drivers/usb/storage/transport.h      2004-12-17 11:27:47 -05:00
@@ -181,4 +181,5 @@
 extern int usb_stor_bulk_transfer_sg(struct us_data *us, unsigned int pipe,
                void *buf, unsigned int length, int use_sg, int *residual);
 
+extern int usb_stor_port_reset(struct us_data *us);
 #endif



-------------------------------------------------------
SF email is sponsored by - The IT Product Guide
Read honest & candid reviews on hundreds of IT Products from real users.
Discover which products truly live up to the hype. Start reading now.
http://ads.osdn.com/?ad_id=6595&alloc_id=14396&op=click
_______________________________________________
linux-usb-devel@lists.sourceforge.net
To unsubscribe, use the last form field at:
https://lists.sourceforge.net/lists/listinfo/linux-usb-devel

Reply via email to