This patch resolves a FIXME, which happens to make many of the VIA problems act significantly less severe. The change defers unlinking any QH that just became idle, since it's not unlikely it'll be used again before many milliseconds pass.
It reduces the number of unlink interrupts (IAA), and means fewer re-activation issues. Like: newly queued TDs being all or partially processed before the QH gets de-activated. The VIA hardware seems to have some problems in those cases. (Which are extremely common on 2.4 kernels, and less so on 2.5 because usb-storage streams data much better now.)
It also starts tracking the "lost IAA" errors that I see on at least one VT8235 motherboard. It shows in the "registers" sysfs file. It'd be good to know if it's just my hardware that has this problem, or if other folk also see it.
Please merge to Linus' tree.
- Dave
--- 1.28/drivers/usb/host/ehci-dbg.c Sun Feb 16 05:48:03 2003
+++ edited/drivers/usb/host/ehci-dbg.c Mon Feb 24 11:30:38 2003
@@ -605,8 +605,10 @@
}
#ifdef EHCI_STATS
- temp = snprintf (next, size, "irq normal %ld err %ld reclaim %ld\n",
- ehci->stats.normal, ehci->stats.error, ehci->stats.reclaim);
+ temp = snprintf (next, size,
+ "irq normal %ld err %ld reclaim %ld (lost %ld)\n",
+ ehci->stats.normal, ehci->stats.error, ehci->stats.reclaim,
+ ehci->stats.lost_iaa);
size -= temp;
next += temp;
--- 1.72/drivers/usb/host/ehci-hcd.c Sat Feb 15 02:00:06 2003
+++ edited/drivers/usb/host/ehci-hcd.c Mon Feb 24 11:30:38 2003
@@ -260,6 +260,7 @@
if (status & STS_IAA) {
ehci_vdbg (ehci, "lost IAA\n");
+ COUNT (ehci->stats.lost_iaa);
writel (STS_IAA, &ehci->regs->status);
ehci->reclaim_ready = 1;
}
@@ -547,8 +548,9 @@
ehci_mem_cleanup (ehci);
#ifdef EHCI_STATS
- ehci_dbg (ehci, "irq normal %ld err %ld reclaim %ld\n",
- ehci->stats.normal, ehci->stats.error, ehci->stats.reclaim);
+ ehci_dbg (ehci, "irq normal %ld err %ld reclaim %ld (lost %ld)\n",
+ ehci->stats.normal, ehci->stats.error, ehci->stats.reclaim,
+ ehci->stats.lost_iaa);
ehci_dbg (ehci, "complete %ld unlink %ld\n",
ehci->stats.complete, ehci->stats.unlink);
#endif
--- 1.69/drivers/usb/host/ehci-q.c Sun Feb 16 05:48:03 2003
+++ edited/drivers/usb/host/ehci-q.c Mon Feb 24 11:30:38 2003
@@ -800,6 +800,7 @@
&& !usb_pipecontrol (urb->pipe)) {
/* "never happens": drivers do stall cleanup right */
if (qh->qh_state != QH_STATE_IDLE
+ && !list_empty (&qh->qtd_list)
&& qh->qh_state != QH_STATE_COMPLETING)
ehci_warn (ehci, "clear toggle dev%d "
"ep%d%s: not idle\n",
@@ -1014,6 +1015,7 @@
scan_async (struct ehci_hcd *ehci, struct pt_regs *regs)
{
struct ehci_qh *qh;
+ int unlink_delay = 0;
if (!++(ehci->stamp))
ehci->stamp++;
@@ -1040,17 +1042,25 @@
}
}
- /* unlink idle entries, reducing HC PCI usage as
- * well as HCD schedule-scanning costs.
- *
- * FIXME don't unlink idle entries so quickly; it
- * can penalize (common) half duplex protocols.
+ /* unlink idle entries, reducing HC PCI usage as well
+ * as HCD schedule-scanning costs. delay for any qh
+ * we just scanned, there's a not-unusual case that it
+ * doesn't stay idle for long.
+ * (plus, avoids some kind of re-activation race.)
*/
- if (list_empty (&qh->qtd_list) && !ehci->reclaim) {
- start_unlink_async (ehci, qh);
+ if (list_empty (&qh->qtd_list)) {
+ if (qh->stamp == ehci->stamp)
+ unlink_delay = 1;
+ else if (!ehci->reclaim) {
+ start_unlink_async (ehci, qh);
+ unlink_delay = 0;
+ }
}
qh = qh->qh_next.qh;
} while (qh);
}
+
+ if (unlink_delay && !timer_pending (&ehci->watchdog))
+ mod_timer (&ehci->watchdog, jiffies + EHCI_WATCHDOG_JIFFIES/2);
}
--- 1.27/drivers/usb/host/ehci.h Sat Feb 15 01:18:35 2003
+++ edited/drivers/usb/host/ehci.h Mon Feb 24 11:30:38 2003
@@ -27,6 +27,7 @@
unsigned long normal;
unsigned long error;
unsigned long reclaim;
+ unsigned long lost_iaa;
/* termination of urbs from core */
unsigned long complete;
