I'm sending this around for testing and comments, I think it'll
address a bug Martin uncovered using "usbtest" on 2.5.42. That
bug should also show up with usb-storage, on 2.5.45 or later; the
comments I'm after are more "does it break your 2.5 ehci setup"
than anything else. (Except for Martin, not many folk will have
seen this bug yet.)
What it does is give up on catching all the "race with HC" cases
when appending to a live QH, by switching to using a disabled
"dummy" TD at the end of all hardware queues. The HC won't cache
disabled TDs, so it just sees "always good" pointers: no races.
As a side benefit this also makes it safe to not irq on completion
of most TDs that are queued using the scatterlist calls, so it'll
be typical for one 64 KByte usb-storage request to mean just one
irq (or less!) even without tuning ehci irq latency (for the DATA
stage, that is).
It shouldn't make any difference to folk having problems on any
VIA hardware, but I suspect I know what makes that happen now
(so a separate patch will be forthcoming).
The patch should apply fine on 2.5.46, which didn't seem to change
USB (except indirectly through "sysfs" and friends).
- Dave
--- ./drivers/usb-dist/host/ehci.h Fri Sep 27 17:35:32 2002
+++ ./drivers/usb/host/ehci.h Tue Nov 5 07:26:23 2002
@@ -31,11 +31,6 @@
/* termination of urbs from core */
unsigned long complete;
unsigned long unlink;
-
- /* qhs patched to recover from td queueing race
- * (can avoid by using 'dummy td', allowing fewer irqs)
- */
- unsigned long qpatch;
};
/* ehci_hcd->lock guards shared data against other CPUs:
@@ -311,6 +306,7 @@
dma_addr_t qh_dma; /* address of qh */
union ehci_shadow qh_next; /* ptr to qh; or periodic */
struct list_head qtd_list; /* sw qtd list */
+ struct ehci_qtd *dummy;
atomic_t refcount;
--- ./drivers/usb-dist/host/ehci-hcd.c Wed Oct 30 20:52:12 2002
+++ ./drivers/usb/host/ehci-hcd.c Tue Nov 5 07:26:23 2002
@@ -494,8 +494,8 @@
#ifdef EHCI_STATS
dbg ("irq normal %ld err %ld reclaim %ld",
ehci->stats.normal, ehci->stats.error, ehci->stats.reclaim);
- dbg ("complete %ld unlink %ld qpatch %ld",
- ehci->stats.complete, ehci->stats.unlink, ehci->stats.qpatch);
+ dbg ("complete %ld unlink %ld",
+ ehci->stats.complete, ehci->stats.unlink);
#endif
dbg_status (ehci, "ehci_stop completed", readl (&ehci->regs->status));
--- ./drivers/usb-dist/host/ehci-q.c Wed Oct 30 20:52:12 2002
+++ ./drivers/usb/host/ehci-q.c Tue Nov 5 08:58:27 2002
@@ -85,7 +85,7 @@
/* update halted (but potentially linked) qh */
-static inline void qh_update (struct ehci_qh *qh, struct ehci_qtd *qtd)
+static void qh_update (struct ehci_qh *qh, struct ehci_qtd *qtd)
{
qh->hw_current = 0;
qh->hw_qtd_next = QTD_NEXT (qtd->qtd_dma);
@@ -221,17 +221,6 @@
struct urb *urb = qtd->urb;
u32 token = 0;
- /* hc's on-chip qh overlay cache can overwrite our idea of
- * next qtd ptrs, if we appended a qtd while the queue was
- * advancing. (because we don't use dummy qtds.)
- */
- if (cpu_to_le32 (qtd->qtd_dma) == qh->hw_current
- && qtd->hw_next != qh->hw_qtd_next) {
- qh->hw_alt_next = qtd->hw_alt_next;
- qh->hw_qtd_next = qtd->hw_next;
- COUNT (ehci->stats.qpatch);
- }
-
/* clean up any state from previous QTD ...*/
if (last) {
if (likely (last->urb != urb)) {
@@ -495,8 +484,7 @@
}
/* by default, enable interrupt on urb completion */
-// ... do it always, unless we switch over to dummy qtds
-// if (likely (!(urb->transfer_flags & URB_NO_INTERRUPT)))
+ if (likely (!(urb->transfer_flags & URB_NO_INTERRUPT)))
qtd->hw_token |= __constant_cpu_to_le32 (QTD_IOC);
return head;
@@ -661,8 +649,15 @@
/* initialize sw and hw queues with these qtds */
if (!list_empty (qtd_list)) {
+ struct ehci_qtd *qtd;
+
+ /* hc's list view ends with dummy td; we might update it */
+ qtd = list_entry (qtd_list->prev, struct ehci_qtd, qtd_list);
+ qtd->hw_next = QTD_NEXT (qh->dummy->qtd_dma);
+
list_splice (qtd_list, &qh->qtd_list);
- qh_update (qh, list_entry (qtd_list->next, struct ehci_qtd, qtd_list));
+ qtd = list_entry (qtd_list->next, struct ehci_qtd, qtd_list);
+ qh_update (qh, qtd);
} else {
qh->hw_qtd_next = qh->hw_alt_next = EHCI_LIST_END;
}
@@ -767,39 +762,48 @@
/* append to tds already queued to this qh? */
if (unlikely (!list_empty (&qh->qtd_list) && qtd)) {
- struct ehci_qtd *last_qtd;
- u32 hw_next;
+ struct ehci_qtd *dummy;
+ dma_addr_t dma;
+ u32 token;
+
+ /* to avoid racing the HC, use the dummy td instead of
+ * the first td of our list (becomes new dummy). all
+ * these tds stay deactivated until we're done, when the
+ * HC is finally allowed to fetch the old dummy (4.10.2).
+ */
+ token = qtd->hw_token;
+ qtd->hw_token = 0;
+ dummy = qh->dummy;
+ // dbg ("swap td %p with dummy %p", qtd, dummy);
+
+ dma = dummy->qtd_dma;
+ *dummy = *qtd;
+ dummy->qtd_dma = dma;
+ list_del (&qtd->qtd_list);
+ list_add (&dummy->qtd_list, qtd_list);
- /* update the last qtd's "next" pointer */
- // dbg_qh ("non-empty qh", ehci, qh);
- last_qtd = list_entry (qh->qtd_list.prev,
- struct ehci_qtd, qtd_list);
- hw_next = QTD_NEXT (qtd->qtd_dma);
- last_qtd->hw_next = hw_next;
+ ehci_qtd_init (qtd, qtd->qtd_dma);
+ qh->dummy = qtd;
- /* previous urb allows short rx? maybe optimize. */
- if (!(last_qtd->urb->transfer_flags & URB_SHORT_NOT_OK)
- && (epnum & 0x10)) {
- // only the last QTD for now
- last_qtd->hw_alt_next = hw_next;
- }
+ /* hc must see the new dummy at list end */
+ qtd = list_entry (qh->qtd_list.prev,
+ struct ehci_qtd, qtd_list);
+ qtd->hw_next = QTD_NEXT (dma);
- /* qh_completions() may need to patch the qh overlay if
- * the hc was advancing this queue while we appended.
- * we know it can: last_qtd->hw_token has IOC set.
- *
- * or: use a dummy td (so the overlay gets the next td
- * only when we set its active bit); fewer irqs.
- */
+ /* let the hc process these next qtds */
wmb ();
+ dummy->hw_token = token;
/* no URB queued */
} else {
- // dbg_qh ("empty qh", ehci, qh);
+ struct ehci_qtd *last_qtd;
- /* NOTE: we already canceled any queued URBs
- * when the endpoint halted.
- */
+ /* make sure hc sees current dummy at the end */
+ last_qtd = list_entry (qtd_list->prev,
+ struct ehci_qtd, qtd_list);
+ last_qtd->hw_next = QTD_NEXT (qh->dummy->qtd_dma);
+
+ // dbg_qh ("empty qh", ehci, qh);
/* usb_clear_halt() means qh data toggle gets reset */
if (unlikely (!usb_gettoggle (urb->dev,
--- ./drivers/usb-dist/host/ehci-mem.c Mon Oct 7 18:19:12 2002
+++ ./drivers/usb/host/ehci-mem.c Tue Nov 5 09:43:33 2002
@@ -58,19 +58,23 @@
/* Allocate the key transfer structures from the previously allocated pool */
+static void ehci_qtd_init (struct ehci_qtd *qtd, dma_addr_t dma)
+{
+ memset (qtd, 0, sizeof *qtd);
+ qtd->qtd_dma = dma;
+ qtd->hw_next = EHCI_LIST_END;
+ qtd->hw_alt_next = EHCI_LIST_END;
+ INIT_LIST_HEAD (&qtd->qtd_list);
+}
+
static struct ehci_qtd *ehci_qtd_alloc (struct ehci_hcd *ehci, int flags)
{
struct ehci_qtd *qtd;
dma_addr_t dma;
qtd = pci_pool_alloc (ehci->qtd_pool, flags, &dma);
- if (qtd != 0) {
- memset (qtd, 0, sizeof *qtd);
- qtd->qtd_dma = dma;
- qtd->hw_next = EHCI_LIST_END;
- qtd->hw_alt_next = EHCI_LIST_END;
- INIT_LIST_HEAD (&qtd->qtd_list);
- }
+ if (qtd != 0)
+ ehci_qtd_init (qtd, dma);
return qtd;
}
@@ -87,12 +91,21 @@
qh = (struct ehci_qh *)
pci_pool_alloc (ehci->qh_pool, flags, &dma);
- if (qh) {
- memset (qh, 0, sizeof *qh);
- atomic_set (&qh->refcount, 1);
- qh->qh_dma = dma;
- // INIT_LIST_HEAD (&qh->qh_list);
- INIT_LIST_HEAD (&qh->qtd_list);
+ if (!qh)
+ return qh;
+
+ memset (qh, 0, sizeof *qh);
+ atomic_set (&qh->refcount, 1);
+ qh->qh_dma = dma;
+ // INIT_LIST_HEAD (&qh->qh_list);
+ INIT_LIST_HEAD (&qh->qtd_list);
+
+ /* dummy td enables safe urb queuing */
+ qh->dummy = ehci_qtd_alloc (ehci, flags);
+ if (qh->dummy == 0) {
+ dbg ("no dummy td");
+ pci_pool_free (ehci->qh_pool, qh, qh->qh_dma);
+ qh = 0;
}
return qh;
}
@@ -115,6 +128,8 @@
dbg ("unused qh not empty!");
BUG ();
}
+ if (qh->dummy)
+ ehci_qtd_free (ehci, qh->dummy);
pci_pool_free (ehci->qh_pool, qh, qh->qh_dma);
}
--- ./drivers/usb-dist/host/ehci-dbg.c Fri Sep 27 17:35:32 2002
+++ ./drivers/usb/host/ehci-dbg.c Tue Nov 5 07:58:31 2002
@@ -272,6 +272,7 @@
static void qh_lines (struct ehci_qh *qh, char **nextp, unsigned *sizep)
{
u32 scratch;
+ u32 hw_curr;
struct list_head *entry;
struct ehci_qtd *td;
unsigned temp;
@@ -279,20 +280,22 @@
char *next = *nextp;
scratch = cpu_to_le32p (&qh->hw_info1);
- temp = snprintf (next, size, "qh/%p dev%d %cs ep%d %08x %08x",
+ hw_curr = cpu_to_le32p (&qh->hw_current);
+ temp = snprintf (next, size, "qh/%p dev%d %cs ep%d %08x %08x (%08x %08x)",
qh, scratch & 0x007f,
speed_char (scratch),
(scratch >> 8) & 0x000f,
- scratch, cpu_to_le32p (&qh->hw_info2));
+ scratch, cpu_to_le32p (&qh->hw_info2),
+ hw_curr, cpu_to_le32p (&qh->hw_token));
size -= temp;
next += temp;
list_for_each (entry, &qh->qtd_list) {
- td = list_entry (entry, struct ehci_qtd,
- qtd_list);
+ td = list_entry (entry, struct ehci_qtd, qtd_list);
scratch = cpu_to_le32p (&td->hw_token);
temp = snprintf (next, size,
- "\n\ttd/%p %s len=%d %08x urb %p",
+ "\n\t%std/%p %s len=%d %08x urb %p",
+ (hw_curr == td->qtd_dma) ? "*" : "",
td, ({ char *tmp;
switch ((scratch>>8)&0x03) {
case 0: tmp = "out"; break;
@@ -552,8 +555,8 @@
size -= temp;
next += temp;
- temp = snprintf (next, size, "complete %ld unlink %ld qpatch %ld\n",
- ehci->stats.complete, ehci->stats.unlink, ehci->stats.qpatch);
+ temp = snprintf (next, size, "complete %ld unlink %ld\n",
+ ehci->stats.complete, ehci->stats.unlink);
size -= temp;
next += temp;
#endif