ChangeSet 1.2169, 2004/12/09 12:23:06-08:00, [EMAIL PROTECTED] [PATCH] USB: EHCI qh update race fix
This makes the EHCI driver stop trying to update a live QH ... it's not like OHCI, that can't be done safely because of a hardware race. The fix: - Unlinks the QH before updating it; only the tail can safely be updated "live", not the queue head. The async schedule (all control/bulk QHs) and periodic schedule (interrupt QH) work a bit differently ... high bandwidth transfers will hiccup. - Moves "update QH" and "clear toggle" logic into one new qh_refresh() routine, used in several places. The race shows readily enough under load with the right hardware. The controller silicon might be relatively slow, or maybe it's the bus that's slow/busy: Host Controller --- ---------- reads two TD pointers update two TD pointers wmb() activate QH reads rest of QH Net result is that the HC treated old TD pointers as valid, and things started misbehaving. Busy controllers would misbehave worse; some systems wouldn't notice more than a slowdown, especially with light USB loads. This affects behavior in two cases. The uncommon one is when an endpoint gets an error and halts. The more common one happens when the controller runs off the end of its queue and overlays an inactive "dummy" TD into the QH ... something the spec says shouldn't happen, but which more silicon seems to be doing. (Presumably to reduce DMA chatter.) Signed-off-by: David Brownell <[EMAIL PROTECTED]> Signed-off-by: Greg Kroah-Hartman <[EMAIL PROTECTED]> drivers/usb/host/ehci-q.c | 133 +++++++++++++++++++++++------------------- drivers/usb/host/ehci-sched.c | 5 - 2 files changed, 78 insertions(+), 60 deletions(-) diff -Nru a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c --- a/drivers/usb/host/ehci-q.c 2004-12-09 14:54:30 -08:00 +++ b/drivers/usb/host/ehci-q.c 2004-12-09 14:54:30 -08:00 @@ -83,19 +83,59 @@ /*-------------------------------------------------------------------------*/ -/* update halted (but potentially linked) qh */ - static inline void qh_update (struct ehci_hcd *ehci, struct ehci_qh *qh, struct ehci_qtd *qtd) { + /* writes to an active overlay are unsafe */ + BUG_ON(qh->qh_state != QH_STATE_IDLE); + qh->hw_qtd_next = QTD_NEXT (qtd->qtd_dma); qh->hw_alt_next = EHCI_LIST_END; + /* Except for control endpoints, we make hardware maintain data + * toggle (like OHCI) ... here (re)initialize the toggle in the QH, + * and set the pseudo-toggle in udev. Only usb_clear_halt() will + * ever clear it. + */ + if (!(qh->hw_info1 & cpu_to_le32(1 << 14))) { + unsigned is_out, epnum; + + is_out = !(qtd->hw_token & cpu_to_le32(1 << 8)); + epnum = (le32_to_cpup(&qh->hw_info1) >> 8) & 0x0f; + if (unlikely (!usb_gettoggle (qh->dev, epnum, is_out))) { + qh->hw_token &= ~__constant_cpu_to_le32 (QTD_TOGGLE); + usb_settoggle (qh->dev, epnum, is_out, 1); + } + } + /* HC must see latest qtd and qh data before we clear ACTIVE+HALT */ wmb (); qh->hw_token &= __constant_cpu_to_le32 (QTD_TOGGLE | QTD_STS_PING); } +/* if it weren't for a common silicon quirk (writing the dummy into the qh + * overlay, so qh->hw_token wrongly becomes inactive/halted), only fault + * recovery (including urb dequeue) would need software changes to a QH... + */ +static void +qh_refresh (struct ehci_hcd *ehci, struct ehci_qh *qh) +{ + struct ehci_qtd *qtd; + + if (list_empty (&qh->qtd_list)) + qtd = qh->dummy; + else { + qtd = list_entry (qh->qtd_list.next, + struct ehci_qtd, qtd_list); + /* first qtd may already be partially processed */ + if (cpu_to_le32 (qtd->qtd_dma) == qh->hw_current) + qtd = NULL; + } + + if (qtd) + qh_update (ehci, qh, qtd); +} + /*-------------------------------------------------------------------------*/ static void qtd_copy_status ( @@ -226,6 +266,11 @@ spin_lock (&ehci->lock); } +static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh); + +static void intr_deschedule (struct ehci_hcd *ehci, + struct ehci_qh *qh, int wait); +static int qh_schedule (struct ehci_hcd *ehci, struct ehci_qh *qh); /* * Process and free completed qtds for a qh, returning URBs to drivers. @@ -369,21 +414,27 @@ /* restore original state; caller must unlink or relink */ qh->qh_state = state; - /* update qh after fault cleanup */ - if (unlikely (stopped != 0) - /* some EHCI 0.95 impls will overlay dummy qtds */ - || qh->hw_qtd_next == EHCI_LIST_END) { - if (list_empty (&qh->qtd_list)) - end = qh->dummy; - else { - end = list_entry (qh->qtd_list.next, - struct ehci_qtd, qtd_list); - /* first qtd may already be partially processed */ - if (cpu_to_le32 (end->qtd_dma) == qh->hw_current) - end = NULL; + /* be sure the hardware's done with the qh before refreshing + * it after fault cleanup, or recovering from silicon wrongly + * overlaying the dummy qtd (which reduces DMA chatter). + */ + if (stopped != 0 || qh->hw_qtd_next == EHCI_LIST_END) { + switch (state) { + case QH_STATE_IDLE: + qh_refresh(ehci, qh); + break; + case QH_STATE_LINKED: + /* should be rare for periodic transfers, + * except maybe high bandwidth ... + */ + if (qh->period) { + intr_deschedule (ehci, qh, 1); + (void) qh_schedule (ehci, qh); + } else + start_unlink_async (ehci, qh); + break; + /* otherwise, unlink already started */ } - if (end) - qh_update (ehci, qh, end); } return count; @@ -557,21 +608,6 @@ /*-------------------------------------------------------------------------*/ -/* - * Hardware maintains data toggle (like OHCI) ... here we (re)initialize - * the hardware data toggle in the QH, and set the pseudo-toggle in udev - * so we can see if usb_clear_halt() was called. NOP for control, since - * we set up qh->hw_info1 to always use the QTD toggle bits. - */ -static inline void -clear_toggle (struct usb_device *udev, int ep, int is_out, struct ehci_qh *qh) -{ - vdbg ("clear toggle, dev %d ep 0x%x-%s", - udev->devnum, ep, is_out ? "out" : "in"); - qh->hw_token &= ~__constant_cpu_to_le32 (QTD_TOGGLE); - usb_settoggle (udev, ep, is_out, 1); -} - // Would be best to create all qh's from config descriptors, // when each interface/altsetting is established. Unlink // any previous qh and cancel its urbs first; endpoints are @@ -651,11 +687,11 @@ qh->period = urb->interval; } - - /* support for tt scheduling */ - qh->dev = usb_get_dev (urb->dev); } + /* support for tt scheduling, and access to toggles */ + qh->dev = usb_get_dev (urb->dev); + /* using TT? */ switch (urb->dev->speed) { case USB_SPEED_LOW: @@ -715,8 +751,8 @@ qh->qh_state = QH_STATE_IDLE; qh->hw_info1 = cpu_to_le32 (info1); qh->hw_info2 = cpu_to_le32 (info2); - qh_update (ehci, qh, qh->dummy); usb_settoggle (urb->dev, usb_pipeendpoint (urb->pipe), !is_input, 1); + qh_refresh (ehci, qh); return qh; } @@ -745,7 +781,9 @@ } } - qh->hw_token &= ~HALT_BIT; + /* clear halt and/or toggle; and maybe recover from silicon quirk */ + if (qh->qh_state == QH_STATE_IDLE) + qh_refresh (ehci, qh); /* splice right after start */ qh->qh_next = head->qh_next; @@ -820,27 +858,8 @@ qh->hw_info1 &= ~QH_ADDR_MASK; } - /* usb_clear_halt() means qh data toggle gets reset */ - if (unlikely (!usb_gettoggle (urb->dev, - (epnum & 0x0f), !(epnum & 0x10))) - && !usb_pipecontrol (urb->pipe)) { - /* "never happens": drivers do stall cleanup right */ - if (qh->qh_state != QH_STATE_IDLE - && !list_empty (&qh->qtd_list) - && qh->qh_state != QH_STATE_COMPLETING) - ehci_warn (ehci, "clear toggle dev%d " - "ep%d%s: not idle\n", - usb_pipedevice (urb->pipe), - epnum & 0x0f, - usb_pipein (urb->pipe) - ? "in" : "out"); - /* else we know this overlay write is safe */ - clear_toggle (urb->dev, - epnum & 0x0f, !(epnum & 0x10), qh); - } - /* just one way to queue requests: swap with the dummy qtd. - * only hc or qh_completions() usually modify the overlay. + * only hc or qh_refresh() ever modify the overlay. */ if (likely (qtd != 0)) { struct ehci_qtd *dummy; @@ -935,8 +954,6 @@ /*-------------------------------------------------------------------------*/ /* the async qh for the qtds being reclaimed are now unlinked from the HC */ - -static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh); static void end_unlink_async (struct ehci_hcd *ehci, struct pt_regs *regs) { diff -Nru a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c --- a/drivers/usb/host/ehci-sched.c 2004-12-09 14:54:30 -08:00 +++ b/drivers/usb/host/ehci-sched.c 2004-12-09 14:54:30 -08:00 @@ -325,7 +325,7 @@ status = disable_periodic (ehci); else { status = 0; - vdbg ("periodic schedule still enabled"); + ehci_vdbg (ehci, "periodic schedule still enabled\n"); } /* @@ -342,7 +342,7 @@ * the race is very short. then if qh also isn't * rescheduled soon, it won't matter. otherwise... */ - vdbg ("intr_deschedule..."); + ehci_vdbg (ehci, "intr_deschedule...\n"); } } else qh->hw_next = EHCI_LIST_END; @@ -450,6 +450,7 @@ __le32 c_mask; unsigned frame; /* 0..(qh->period - 1), or NO_FRAME */ + qh_refresh(ehci, qh); qh->hw_next = EHCI_LIST_END; frame = qh->start; ------------------------------------------------------- SF email is sponsored by - The IT Product Guide Read honest & candid reviews on hundreds of IT Products from real users. Discover which products truly live up to the hype. Start reading now. http://productguide.itmanagersjournal.com/ _______________________________________________ [EMAIL PROTECTED] To unsubscribe, use the last form field at: https://lists.sourceforge.net/lists/listinfo/linux-usb-devel