I thought I'd send this around, although there's an annoying
lockup (most likely in non-ehci code, see below) that'll likely
keep most folk from using this with storage devices.  I did
some basic testing with USB 1.1 devices through USB 2.0 hubs,
which behaved just fine.  (Consistent with the lockup being
in some other code...)

Changes since the latest ehci patch:

    - Uses the async_next register correctly ... the register
      definition didn't mention it, but later in the spec are
      requirements that the value not be changed when the async
      schedule was enabled.  Resolves a vt8235 problem, and
      seemingly some vt6202 ones (plus who knows what else);

    - Updated td queueing to use a 'dummy td', getting rid of
      the need for some race workarounds and letting queued
      urbs eliminate most irqs.  Unlikely to matter for any
      drivers on 2.4, but usb-storage on 2.5 wins by this.

    - Some of the diagnostics were trimmed, and many now look
      like the 2.5 <linux/device.h> messages.

Problems I've noticed:

    - A curious hard lockup, new to me, that never shows on
      2.5 with the same (or much more strenuous) testing.
      It appears pretty easily ... and seemingly with "uhci"
      too, so I'm thinking it's related to scsi or storage.

      That did _not_ appear on the 2.4.20pre6 testing that
      produced the "pre11" ehci patch.

    - There's still a cardbus eject mode that's trouble:
      physical eject without disconnect.  That cleanup
      needs to be done from the hcd framework by keventd,
      and the same in 2.5 (where I have part of a patch).
      (The EHCI code behaves; other code goofs up.)

This goes on top of 2.4.20 with the pre11 patch, or on top
of the patches Greg just sent to Marcelo.

- Dave


--- ./drivers/usb-dist/hcd.c    Mon Dec  2 22:36:50 2002
+++ ./drivers/usb/hcd.c Sat Dec  7 06:06:48 2002
@@ -663,7 +663,8 @@
        hcd->driver = driver;
        hcd->description = driver->description;
        hcd->pdev = dev;
-       info ("%s @ %s, %s", hcd->description,  dev->slot_name, dev->name);
+       printk (KERN_INFO "%s %s: %s\n",
+                       hcd->description,  dev->slot_name, dev->name);
 
 #ifndef __sparc__
        sprintf (buf, "%d", dev->irq);
@@ -682,7 +683,8 @@
 
        hcd->regs = base;
        hcd->region = region;
-       info ("irq %s, %s %p", bufp,
+       printk (KERN_INFO "%s %s: irq %s, %s %p\n",
+               hcd->description,  dev->slot_name, bufp,
                (driver->flags & HCD_MEMORY) ? "pci mem" : "io base",
                base);
 
@@ -739,7 +741,8 @@
        hcd = pci_get_drvdata(dev);
        if (!hcd)
                return;
-       info ("remove: %s, state %x", hcd->bus->bus_name, hcd->state);
+       printk (KERN_INFO "%s %s: remove state %x\n",
+               hcd->description,  dev->slot_name, hcd->state);
 
        if (in_interrupt ()) BUG ();
 
@@ -817,7 +820,8 @@
        int                     retval;
 
        hcd = pci_get_drvdata(dev);
-       info ("suspend %s to state %d", hcd->bus->bus_name, state);
+       printk (KERN_INFO "%s %s: suspend to state %d\n",
+               hcd->description,  dev->slot_name, state);
 
        pci_save_state (dev, hcd->pci_state);
 
@@ -846,7 +850,8 @@
        int                     retval;
 
        hcd = pci_get_drvdata(dev);
-       info ("resume %s", hcd->bus->bus_name);
+       printk (KERN_INFO "%s %s: resume\n",
+               hcd->description,  dev->slot_name);
 
        /* guard against multiple resumes (APM bug?) */
        atomic_inc (&hcd->resume_count);
@@ -1466,10 +1471,6 @@
        // completions for periodic urbs need hooks inside the HCD.
        // hcd_monitor_hook(MONITOR_URB_UPDATE, urb, dev)
 
-       if (urb->status)
-               dbg ("giveback urb %p status %d len %d",
-                       urb, urb->status, urb->actual_length);
-
        // NOTE:  2.5 does this if !URB_NO_DMA_MAP transfer flag
        if (usb_pipecontrol (urb->pipe))
                pci_unmap_single (hcd->pdev, urb->setup_dma,
--- ./drivers/usb-dist/hcd/ehci.h       Mon Dec  2 22:36:50 2002
+++ ./drivers/usb/hcd/ehci.h    Sat Dec  7 06:06:48 2002
@@ -31,11 +31,6 @@
        /* termination of urbs from core */
        unsigned long           complete;
        unsigned long           unlink;
-
-       /* qhs patched to recover from td queueing race
-        * (can avoid by using 'dummy td', allowing fewer irqs)
-        */
-       unsigned long           qpatch;
 };
 
 /* ehci_hcd->lock guards shared data against other CPUs:
@@ -311,6 +306,7 @@
        dma_addr_t              qh_dma;         /* address of qh */
        union ehci_shadow       qh_next;        /* ptr to qh; or periodic */
        struct list_head        qtd_list;       /* sw qtd list */
+       struct ehci_qtd         *dummy;
 
        atomic_t                refcount;
 
--- ./drivers/usb-dist/hcd/ehci-mem.c   Fri Nov 29 16:57:29 2002
+++ ./drivers/usb/hcd/ehci-mem.c        Sat Dec  7 06:06:48 2002
@@ -58,19 +58,23 @@
 
 /* Allocate the key transfer structures from the previously allocated pool */
 
+static void ehci_qtd_init (struct ehci_qtd *qtd, dma_addr_t dma)
+{
+       memset (qtd, 0, sizeof *qtd);
+       qtd->qtd_dma = dma;
+       qtd->hw_next = EHCI_LIST_END;
+       qtd->hw_alt_next = EHCI_LIST_END;
+       INIT_LIST_HEAD (&qtd->qtd_list);
+}
+
 static struct ehci_qtd *ehci_qtd_alloc (struct ehci_hcd *ehci, int flags)
 {
        struct ehci_qtd         *qtd;
        dma_addr_t              dma;
 
        qtd = pci_pool_alloc (ehci->qtd_pool, flags, &dma);
-       if (qtd != 0) {
-               memset (qtd, 0, sizeof *qtd);
-               qtd->qtd_dma = dma;
-               qtd->hw_next = EHCI_LIST_END;
-               qtd->hw_alt_next = EHCI_LIST_END;
-               INIT_LIST_HEAD (&qtd->qtd_list);
-       }
+       if (qtd != 0)
+               ehci_qtd_init (qtd, dma);
        return qtd;
 }
 
@@ -87,12 +91,21 @@
 
        qh = (struct ehci_qh *)
                pci_pool_alloc (ehci->qh_pool, flags, &dma);
-       if (qh) {
-               memset (qh, 0, sizeof *qh);
-               atomic_set (&qh->refcount, 1);
-               qh->qh_dma = dma;
-               // INIT_LIST_HEAD (&qh->qh_list);
-               INIT_LIST_HEAD (&qh->qtd_list);
+       if (!qh)
+               return qh;
+
+       memset (qh, 0, sizeof *qh);
+       atomic_set (&qh->refcount, 1);
+       qh->qh_dma = dma;
+       // INIT_LIST_HEAD (&qh->qh_list);
+       INIT_LIST_HEAD (&qh->qtd_list);
+
+       /* dummy td enables safe urb queuing */
+       qh->dummy = ehci_qtd_alloc (ehci, flags);
+       if (qh->dummy == 0) {
+               ehci_dbg (ehci, "no dummy td\n");
+               pci_pool_free (ehci->qh_pool, qh, qh->qh_dma);
+               qh = 0;
        }
        return qh;
 }
@@ -100,21 +113,21 @@
 /* to share a qh (cpu threads, or hc) */
 static inline struct ehci_qh *qh_get (/* ehci, */ struct ehci_qh *qh)
 {
-       // dbg ("get %p (%d++)", qh, qh->refcount.counter);
        atomic_inc (&qh->refcount);
        return qh;
 }
 
 static void qh_put (struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
-       // dbg ("put %p (--%d)", qh, qh->refcount.counter);
        if (!atomic_dec_and_test (&qh->refcount))
                return;
        /* clean qtds first, and know this is not linked */
        if (!list_empty (&qh->qtd_list) || qh->qh_next.ptr) {
-               dbg ("unused qh not empty!");
+               ehci_dbg (ehci, "unused qh not empty!\n");
                BUG ();
        }
+       if (qh->dummy)
+               ehci_qtd_free (ehci, qh->dummy);
        pci_pool_free (ehci->qh_pool, qh, qh->qh_dma);
 }
 
@@ -127,6 +140,10 @@
 
 static void ehci_mem_cleanup (struct ehci_hcd *ehci)
 {
+       if (ehci->async)
+               qh_put (ehci, ehci->async);
+       ehci->async = 0;
+
        /* PCI consistent memory and pools */
        if (ehci->qtd_pool)
                pci_pool_destroy (ehci->qtd_pool);
@@ -169,21 +186,21 @@
                        4096 /* can't cross 4K */,
                        flags);
        if (!ehci->qtd_pool) {
-               dbg ("no qtd pool");
-               ehci_mem_cleanup (ehci);
-               return -ENOMEM;
+               goto fail;
        }
 
-       /* QH for control/bulk/intr transfers */
+       /* QHs for control/bulk/intr transfers */
        ehci->qh_pool = pci_pool_create ("ehci_qh", ehci->hcd.pdev,
                        sizeof (struct ehci_qh),
                        32 /* byte alignment (for hw parts) */,
                        4096 /* can't cross 4K */,
                        flags);
        if (!ehci->qh_pool) {
-               dbg ("no qh pool");
-               ehci_mem_cleanup (ehci);
-               return -ENOMEM;
+               goto fail;
+       }
+       ehci->async = ehci_qh_alloc (ehci, flags);
+       if (!ehci->async) {
+               goto fail;
        }
 
        /* ITD for high speed ISO transfers */
@@ -193,9 +210,7 @@
                        4096 /* can't cross 4K */,
                        flags);
        if (!ehci->itd_pool) {
-               dbg ("no itd pool");
-               ehci_mem_cleanup (ehci);
-               return -ENOMEM;
+               goto fail;
        }
 
        /* SITD for full/low speed split ISO transfers */
@@ -205,9 +220,7 @@
                        4096 /* can't cross 4K */,
                        flags);
        if (!ehci->sitd_pool) {
-               dbg ("no sitd pool");
-               ehci_mem_cleanup (ehci);
-               return -ENOMEM;
+               goto fail;
        }
 
        /* Hardware periodic table */
@@ -216,9 +229,7 @@
                        ehci->periodic_size * sizeof (u32),
                        &ehci->periodic_dma);
        if (ehci->periodic == 0) {
-               dbg ("no hw periodic table");
-               ehci_mem_cleanup (ehci);
-               return -ENOMEM;
+               goto fail;
        }
        for (i = 0; i < ehci->periodic_size; i++)
                ehci->periodic [i] = EHCI_LIST_END;
@@ -226,11 +237,14 @@
        /* software shadow of hardware table */
        ehci->pshadow = kmalloc (ehci->periodic_size * sizeof (void *), flags);
        if (ehci->pshadow == 0) {
-               dbg ("no shadow periodic table");
-               ehci_mem_cleanup (ehci);
-               return -ENOMEM;
+               goto fail;
        }
        memset (ehci->pshadow, 0, ehci->periodic_size * sizeof (void *));
 
        return 0;
+
+fail:
+       ehci_dbg (ehci, "couldn't init memory\n");
+       ehci_mem_cleanup (ehci);
+       return -ENOMEM;
 }
--- ./drivers/usb-dist/hcd/ehci-hcd.c   Mon Dec  2 22:36:50 2002
+++ ./drivers/usb/hcd/ehci-hcd.c        Sat Dec  7 10:47:02 2002
@@ -63,13 +63,14 @@
  * First was PCMCIA, like ISA; then CardBus, which is PCI.
  * Next comes "CardBay", using USB 2.0 signals.
  *
- * Contains additional contributions by: Brad Hards, Rory Bolt, and more.
+ * Contains additional contributions by Brad Hards, Rory Bolt, and others.
  * Special thanks to Intel and VIA for providing host controllers to
  * test this driver on, and Cypress (including In-System Design) for
  * providing early devices for those host controllers to talk to!
  *
  * HISTORY:
  *
+ * 2002-11-29  Correct handling for hw async_next register.
  * 2002-08-06  Handling for bulk and interrupt transfers is mostly shared;
  *     only scheduling is different, no arbitrary limitations.
  * 2002-07-25  Sanity check PCI reads, mostly for better cardbus support,
@@ -92,7 +93,7 @@
  * 2001-June   Works with usb-storage and NEC EHCI on 2.4
  */
 
-#define DRIVER_VERSION "2002-Sep-23"
+#define DRIVER_VERSION "2002-Nov-29"
 #define DRIVER_AUTHOR "David Brownell"
 #define DRIVER_DESC "USB 2.0 'Enhanced' Host Controller (EHCI) Driver"
 
@@ -116,7 +117,7 @@
 #define        EHCI_TUNE_MULT_TT       1
 
 #define EHCI_WATCHDOG_JIFFIES  (HZ/100)        /* arbitrary; ~10 msec */
-#define EHCI_ASYNC_JIFFIES     (HZ/3)          /* async idle timeout */
+#define EHCI_ASYNC_JIFFIES     (HZ/20)         /* async idle timeout */
 
 /* Initial IRQ latency:  lower than default */
 static int log2_irq_thresh = 0;                // 0 to 6
@@ -217,7 +218,7 @@
 
        /* wait for any schedule enables/disables to take effect */
        temp = 0;
-       if (ehci->async)
+       if (ehci->async->qh_next.qh)
                temp = STS_ASS;
        if (ehci->next_uframe != -1)
                temp |= STS_PSS;
@@ -262,7 +263,7 @@
        spin_lock_irqsave (&ehci->lock, flags);
        /* guard against lost IAA, which wedges everything */
        ehci_irq (&ehci->hcd);
-       /* unlink the last qh after it's idled a while */
+       /* stop async processing after it's idled a while */
        if (ehci->async_idle) {
                start_unlink_async (ehci, ehci->async);
                ehci->async_idle = 0;
@@ -289,12 +290,13 @@
                        pci_read_config_dword (ehci->hcd.pdev, where, &cap);
                } while ((cap & (1 << 16)) && msec);
                if (cap & (1 << 16)) {
-                       info ("BIOS handoff failed (%d, %04x)", where, cap);
+                       ehci_info (ehci,
+                               "BIOS handoff failed (%d, %04x)\n",
+                               where, cap);
                        return 1;
                } 
-               dbg ("BIOS handoff succeeded");
-       } else
-               dbg ("BIOS handoff not needed");
+               ehci_dbg (ehci, "BIOS handoff succeeded\n");
+       }
        return 0;
 }
 
@@ -325,14 +327,14 @@
                u32             cap;
 
                pci_read_config_dword (ehci->hcd.pdev, temp, &cap);
-               dbg ("capability %04x at %02x", cap, temp);
+               ehci_dbg (ehci, "capability %04x at %02x\n", cap, temp);
                switch (cap & 0xff) {
                case 1:                 /* BIOS/SMM/... handoff */
                        if (bios_handoff (ehci, temp, cap) != 0)
                                return -EOPNOTSUPP;
                        break;
                case 0:                 /* illegal reserved capability */
-                       warn ("illegal capability!");
+                       ehci_warn (ehci, "illegal capability!\n");
                        cap = 0;
                        /* FALLTHROUGH */
                default:                /* unknown */
@@ -362,7 +364,6 @@
        else                                    // N microframes cached
                ehci->i_thresh = 2 + HCC_ISOC_THRES (hcc_params);
 
-       ehci->async = 0;
        ehci->reclaim = 0;
        ehci->next_uframe = -1;
 
@@ -377,6 +378,21 @@
        writel (ehci->periodic_dma, &ehci->regs->frame_list);
 
        /*
+        * dedicate a qh for the async ring head, since we couldn't unlink
+        * a 'real' qh without stopping the async schedule [4.8].  use it
+        * as the 'reclamation list head' too.
+        */
+       ehci->async->qh_next.qh = 0;
+       ehci->async->hw_next = QH_NEXT (ehci->async->qh_dma);
+       ehci->async->hw_info1 = cpu_to_le32 (QH_HEAD);
+       ehci->async->hw_token = cpu_to_le32 (QTD_STS_HALT);
+       ehci->async->hw_qtd_next = EHCI_LIST_END;
+       ehci->async->qh_state = QH_STATE_LINKED;
+       ehci_qtd_free (ehci, ehci->async->dummy);
+       ehci->async->dummy = 0;
+       writel ((u32)ehci->async->qh_dma, &ehci->regs->async_next);
+
+       /*
         * hcc_params controls whether ehci->regs->segment must (!!!)
         * be used; it constrains QH/ITD/SITD and QTD locations.
         * pci_pool consistent memory always uses segment zero.
@@ -390,7 +406,8 @@
        if (HCC_64BIT_ADDR (hcc_params)) {
                writel (0, &ehci->regs->segment);
                if (!pci_set_dma_mask (ehci->hcd.pdev, 0xffffffffffffffffULL))
-                       info ("enabled 64bit PCI DMA (DAC)");
+                       ehci_info (ehci,
+                               "enabled 64bit PCI DMA (DAC)\n");
        }
 
        /* clear interrupt enables, set irq latency */
@@ -437,10 +454,10 @@
         /* PCI Serial Bus Release Number is at 0x60 offset */
        pci_read_config_byte (hcd->pdev, 0x60, &tempbyte);
        temp = readw (&ehci->caps->hci_version);
-       info ("USB %x.%x support enabled, EHCI rev %x.%02x, %s %s",
-             ((tempbyte & 0xf0)>>4), (tempbyte & 0x0f),
-              temp >> 8, temp & 0xff,
-              hcd_name, DRIVER_VERSION);
+       ehci_info (ehci,
+               "USB %x.%x enabled, EHCI %x.%02x, driver %s\n",
+               ((tempbyte & 0xf0)>>4), (tempbyte & 0x0f),
+               temp >> 8, temp & 0xff, DRIVER_VERSION);
 
        /*
         * From here on, khubd concurrently accesses the root
@@ -472,7 +489,7 @@
 {
        struct ehci_hcd         *ehci = hcd_to_ehci (hcd);
 
-       dbg ("%s: stop", hcd_to_bus (hcd)->bus_name);
+       ehci_dbg (ehci, "stop\n");
 
        /* no more interrupts ... */
        if (hcd->state == USB_STATE_RUNNING)
@@ -494,10 +511,10 @@
        ehci_mem_cleanup (ehci);
 
 #ifdef EHCI_STATS
-       dbg ("irq normal %ld err %ld reclaim %ld",
+       ehci_dbg (ehci, "irq normal %ld err %ld reclaim %ld\n",
                ehci->stats.normal, ehci->stats.error, ehci->stats.reclaim);
-       dbg ("complete %ld unlink %ld qpatch %ld",
-               ehci->stats.complete, ehci->stats.unlink, ehci->stats.qpatch);
+       ehci_dbg (ehci, "complete %ld unlink %ld\n",
+               ehci->stats.complete, ehci->stats.unlink);
 #endif
 
        dbg_status (ehci, "ehci_stop completed", readl (&ehci->regs->status));
@@ -740,8 +757,8 @@
        struct ehci_qh          *qh = (struct ehci_qh *) urb->hcpriv;
        unsigned long           flags;
 
-       dbg ("%s urb_dequeue %p qh %p state %d",
-               hcd_to_bus (hcd)->bus_name, urb, qh, qh->qh_state);
+       ehci_vdbg (ehci, "urb_dequeue %p qh %p state %d\n",
+               urb, qh, qh->qh_state);
 
        switch (usb_pipetype (urb->pipe)) {
        // case PIPE_CONTROL:
@@ -749,7 +766,7 @@
        default:
                spin_lock_irqsave (&ehci->lock, flags);
                if (ehci->reclaim) {
-                       dbg ("dq %p: reclaim = %p, %s",
+                       vdbg ("dq %p: reclaim = %p, %s",
                                qh, ehci->reclaim, RUN_CONTEXT);
                        if (qh == ehci->reclaim) {
                                /* unlinking qh for another queued urb? */
@@ -984,7 +1001,6 @@
 
 static int __init init (void) 
 {
-       dbg (DRIVER_INFO);
        dbg ("block sizes: qh %Zd qtd %Zd itd %Zd sitd %Zd",
                sizeof (struct ehci_qh), sizeof (struct ehci_qtd),
                sizeof (struct ehci_itd), sizeof (struct ehci_sitd));
--- ./drivers/usb-dist/hcd/ehci-q.c     Mon Dec  2 22:36:50 2002
+++ ./drivers/usb/hcd/ehci-q.c  Sat Dec  7 10:55:27 2002
@@ -85,7 +85,7 @@
 
 /* update halted (but potentially linked) qh */
 
-static inline void qh_update (struct ehci_qh *qh, struct ehci_qtd *qtd)
+static void qh_update (struct ehci_qh *qh, struct ehci_qtd *qtd)
 {
        qh->hw_current = 0;
        qh->hw_qtd_next = QTD_NEXT (qtd->qtd_dma);
@@ -130,8 +130,9 @@
                else    /* unknown */
                        urb->status = -EPROTO;
 
-               dbg ("ep %d-%s qtd token %08x --> status %d",
-                       /* devpath */
+               ehci_vdbg (ehci,
+                       "dev%d ep%d%s qtd token %08x --> status %d\n",
+                       usb_pipedev (urb->pipe),
                        usb_pipeendpoint (urb->pipe),
                        usb_pipein (urb->pipe) ? "in" : "out",
                        token, urb->status);
@@ -261,17 +262,6 @@
                                ehci_urb_done (ehci, last->urb);
                                count++;
                        }
-
-                       /* qh overlays can have HC's old cached copies of
-                        * next qtd ptrs, if an URB was queued afterwards.
-                        */
-                       if (cpu_to_le32 (last->qtd_dma) == qh->hw_current
-                                       && last->hw_next != qh->hw_qtd_next) {
-                               qh->hw_alt_next = last->hw_alt_next;
-                               qh->hw_qtd_next = last->hw_next;
-                               COUNT (ehci->stats.qpatch);
-                       }
-
                        ehci_qtd_free (ehci, last);
                        last = 0;
                }
@@ -694,8 +684,15 @@
 
        /* initialize sw and hw queues with these qtds */
        if (!list_empty (qtd_list)) {
+               struct ehci_qtd         *qtd;
+
+               /* hc's list view ends with dummy td; we might update it */
+               qtd = list_entry (qtd_list->prev, struct ehci_qtd, qtd_list);
+               qtd->hw_next = QTD_NEXT (qh->dummy->qtd_dma);
+
                list_splice (qtd_list, &qh->qtd_list);
-               qh_update (qh, list_entry (qtd_list->next, struct ehci_qtd, qtd_list));
+               qtd = list_entry (qtd_list->next, struct ehci_qtd, qtd_list);
+               qh_update (qh, qtd);
        } else {
                qh->hw_qtd_next = qh->hw_alt_next = EHCI_LIST_END;
        }
@@ -716,33 +713,33 @@
 static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
        u32             dma = QH_NEXT (qh->qh_dma);
-       struct ehci_qh  *q;
+       struct ehci_qh  *head;
 
-       if (unlikely (!(q = ehci->async))) {
+       /* (re)start the async schedule? */
+       head = ehci->async;
+       if (ehci->async_idle)
+               del_timer (&ehci->watchdog);
+       else if (!head->qh_next.qh) {
                u32     cmd = readl (&ehci->regs->command);
 
-               /* in case a clear of CMD_ASE didn't take yet */
-               (void) handshake (&ehci->regs->status, STS_ASS, 0, 150);
-
-               qh->hw_info1 |= __constant_cpu_to_le32 (QH_HEAD); /* [4.8] */
-               qh->qh_next.qh = qh;
-               qh->hw_next = dma;
-               wmb ();
-               ehci->async = qh;
-               writel ((u32)qh->qh_dma, &ehci->regs->async_next);
-               cmd |= CMD_ASE | CMD_RUN;
-               writel (cmd, &ehci->regs->command);
-               ehci->hcd.state = USB_STATE_RUNNING;
-               /* posted write need not be known to HC yet ... */
-       } else {
-               /* splice right after "start" of ring */
-               qh->hw_info1 &= ~__constant_cpu_to_le32 (QH_HEAD); /* [4.8] */
-               qh->qh_next = q->qh_next;
-               qh->hw_next = q->hw_next;
-               wmb ();
-               q->qh_next.qh = qh;
-               q->hw_next = dma;
+               if (!(cmd & CMD_ASE)) {
+                       /* in case a clear of CMD_ASE didn't take yet */
+                       (void) handshake (&ehci->regs->status, STS_ASS, 0, 150);
+                       cmd |= CMD_ASE | CMD_RUN;
+                       writel (cmd, &ehci->regs->command);
+                       ehci->hcd.state = USB_STATE_RUNNING;
+                       /* posted write need not be known to HC yet ... */
+               }
        }
+
+       /* splice right after start */
+       qh->qh_next = head->qh_next;
+       qh->hw_next = head->hw_next;
+       wmb ();
+
+       head->qh_next.qh = qh;
+       head->hw_next = dma;
+
        qh->qh_state = QH_STATE_LINKED;
        /* qtd completions reported later by interrupt */
 
@@ -777,54 +774,71 @@
                        qtd = list_entry (qtd_list->next, struct ehci_qtd,
                                        qtd_list);
 
-               /* maybe patch the qh used for set_address */
-               if (unlikely (epnum == 0
-                               && le32_to_cpu (qh->hw_info1 & 0x7f) == 0))
-                       qh->hw_info1 |= cpu_to_le32 (usb_pipedevice(urb->pipe));
+               /* control qh may need patching after enumeration */
+               if (unlikely (epnum == 0)) {
+                       /* set_address changes the address */
+                       if (le32_to_cpu (qh->hw_info1 & 0x7f) == 0)
+                               qh->hw_info1 |= cpu_to_le32 (
+                                               usb_pipedevice (urb->pipe));
+
+                       /* for full speed, ep0 maxpacket can grow */
+                       else if (!(qh->hw_info1 & cpu_to_le32 (0x3 << 12))) {
+                               u32     info, max;
+
+                               info = le32_to_cpu (qh->hw_info1);
+                               max = urb->dev->descriptor.bMaxPacketSize0;
+                               if (max > (0x07ff & (info >> 16))) {
+                                       info &= ~(0x07ff << 16);
+                                       info |= max << 16;
+                                       qh->hw_info1 = cpu_to_le32 (info);
+                               }
+                       }
+               }
 
                /* append to tds already queued to this qh? */
                if (unlikely (!list_empty (&qh->qtd_list) && qtd)) {
-                       struct ehci_qtd         *last_qtd;
-                       int                     short_rx = 0;
-                       u32                     hw_next;
+                       struct ehci_qtd         *dummy;
+                       dma_addr_t              dma;
+                       u32                     token;
+
+                       /* to avoid racing the HC, use the dummy td instead of
+                        * the first td of our list (becomes new dummy).  both
+                        * tds stay deactivated until we're done, when the
+                        * HC is allowed to fetch the old dummy (4.10.2).
+                        */
+                       token = qtd->hw_token;
+                       qtd->hw_token = 0;
+                       dummy = qh->dummy;
+                       // dbg ("swap td %p with dummy %p", qtd, dummy);
+
+                       dma = dummy->qtd_dma;
+                       *dummy = *qtd;
+                       dummy->qtd_dma = dma;
+                       list_del (&qtd->qtd_list);
+                       list_add (&dummy->qtd_list, qtd_list);
 
-                       /* update the last qtd's "next" pointer */
-                       // dbg_qh ("non-empty qh", ehci, qh);
-                       last_qtd = list_entry (qh->qtd_list.prev,
+                       ehci_qtd_init (qtd, qtd->qtd_dma);
+                       qh->dummy = qtd;
+
+                       /* hc must see the new dummy at list end */
+                       qtd = list_entry (qh->qtd_list.prev,
                                        struct ehci_qtd, qtd_list);
-                       hw_next = QTD_NEXT (qtd->qtd_dma);
-                       last_qtd->hw_next = hw_next;
+                       qtd->hw_next = QTD_NEXT (dma);
 
-                       /* previous urb allows short rx? maybe optimize. */
-                       if (!(last_qtd->urb->transfer_flags & URB_SHORT_NOT_OK)
-                                       && (epnum & 0x10)) {
-                               // only the last QTD for now
-                               last_qtd->hw_alt_next = hw_next;
-                               short_rx = 1;
-                       }
-
-                       /* Adjust any old copies in qh overlay too.
-                        * Interrupt code must cope with case of HC having it
-                        * cached, and clobbering these updates.
-                        * ... complicates getting rid of extra interrupts!
-                        * (Or:  use dummy td, so cache always stays valid.)
-                        */
-                       if (qh->hw_current == cpu_to_le32 (last_qtd->qtd_dma)) {
-                               wmb ();
-                               qh->hw_qtd_next = hw_next;
-                               if (short_rx)
-                                       qh->hw_alt_next = hw_next
-                                               | (qh->hw_alt_next & 0x1e);
-                               vdbg ("queue to qh %p, patch", qh);
-                       }
+                       /* let the hc process these next qtds */
+                       wmb ();
+                       dummy->hw_token = token;
 
                /* no URB queued */
                } else {
-                       // dbg_qh ("empty qh", ehci, qh);
+                       struct ehci_qtd         *last_qtd;
 
-                       /* NOTE: we already canceled any queued URBs
-                        * when the endpoint halted.
-                        */
+                       /* make sure hc sees current dummy at the end */
+                       last_qtd = list_entry (qtd_list->prev,
+                                       struct ehci_qtd, qtd_list);
+                       last_qtd->hw_next = QTD_NEXT (qh->dummy->qtd_dma);
+
+                       // dbg_qh ("empty qh", ehci, qh);
 
                        /* usb_clear_halt() means qh data toggle gets reset */
                        if (unlikely (!usb_gettoggle (urb->dev,
@@ -916,8 +930,19 @@
        if (!list_empty (&qh->qtd_list)
                        && HCD_IS_RUNNING (ehci->hcd.state))
                qh_link_async (ehci, qh);
-       else
+       else {
                qh_put (ehci, qh);              // refcount from async list
+
+               /* it's not free to turn the async schedule on/off, so
+                * leave it active but idle for a while once it empties.
+                */
+               if (!ehci->async->qh_next.qh
+                               && !timer_pending (&ehci->watchdog)) {
+                       ehci->async_idle = 1;
+                       mod_timer (&ehci->watchdog,
+                                       jiffies + EHCI_ASYNC_JIFFIES);
+               }
+       }
 }
 
 /* makes sure the async qh will become idle */
@@ -930,7 +955,6 @@
 
 #ifdef DEBUG
        if (ehci->reclaim
-                       || !ehci->async
                        || qh->qh_state != QH_STATE_LINKED
 #ifdef CONFIG_SMP
 // this macro lies except on SMP compiles
@@ -940,31 +964,20 @@
                BUG ();
 #endif
 
-       qh->qh_state = QH_STATE_UNLINK;
-       ehci->reclaim = qh = qh_get (qh);
-
-       // dbg_qh ("start unlink", ehci, qh);
-
-       /* Remove the last QH (qhead)?  Stop async schedule first. */
-       if (unlikely (qh == ehci->async && qh->qh_next.qh == qh)) {
+       /* stop async schedule right now? */
+       if (unlikely (qh == ehci->async)) {
                /* can't get here without STS_ASS set */
                if (ehci->hcd.state != USB_STATE_HALT) {
                        writel (cmd & ~CMD_ASE, &ehci->regs->command);
-                       (void) handshake (&ehci->regs->status, STS_ASS, 0, 150);
-#if 0
-                       // one VT8235 system wants to die with STS_FATAL
-                       // unless this qh is leaked here. others seem ok...
-                       qh = qh_get (qh);
-                       dbg_qh ("async/off", ehci, qh);
-#endif
+                       wmb ();
+                       // handshake later, if we need to
                }
-               qh->qh_next.qh = ehci->async = 0;
-
-               ehci->reclaim_ready = 1;
-               tasklet_schedule (&ehci->tasklet);
                return;
        } 
 
+       qh->qh_state = QH_STATE_UNLINK;
+       ehci->reclaim = qh = qh_get (qh);
+
        if (unlikely (ehci->hcd.state == USB_STATE_HALT)) {
                ehci->reclaim_ready = 1;
                tasklet_schedule (&ehci->tasklet);
@@ -972,13 +985,9 @@
        }
 
        prev = ehci->async;
-       while (prev->qh_next.qh != qh && prev->qh_next.qh != ehci->async)
+       while (prev->qh_next.qh != qh)
                prev = prev->qh_next.qh;
 
-       if (qh->hw_info1 & __constant_cpu_to_le32 (QH_HEAD)) {
-               ehci->async = prev;
-               prev->hw_info1 |= __constant_cpu_to_le32 (QH_HEAD);
-       }
        prev->hw_next = qh->hw_next;
        prev->qh_next = qh->qh_next;
        wmb ();
@@ -1000,7 +1009,7 @@
        unsigned                count;
 
 rescan:
-       qh = ehci->async;
+       qh = ehci->async->qh_next.qh;
        count = 0;
        if (likely (qh != 0)) {
                do {
@@ -1012,25 +1021,17 @@
                                /* concurrent unlink could happen here */
                                count += qh_completions (ehci, qh);
                                qh_put (ehci, qh);
+                               goto rescan;
                        }
 
                        /* unlink idle entries, reducing HC PCI usage as
-                        * well as HCD schedule-scanning costs.  removing
-                        * the last qh is deferred, since it's costly.
+                        * well as HCD schedule-scanning costs.
                         *
                         * FIXME don't unlink idle entries so quickly; it
                         * can penalize (common) half duplex protocols.
                         */
                        if (list_empty (&qh->qtd_list) && !ehci->reclaim) {
-                               if (qh->qh_next.qh != qh) {
-                                       // dbg ("irq/empty");
-                                       start_unlink_async (ehci, qh);
-                               } else if (!timer_pending (&ehci->watchdog)) {
-                                       /* can't use IAA for last entry */
-                                       ehci->async_idle = 1;
-                                       mod_timer (&ehci->watchdog,
-                                               jiffies + EHCI_ASYNC_JIFFIES);
-                               }
+                               start_unlink_async (ehci, qh);
                        }
 
                        /* keep latencies down: let any irqs in */
@@ -1042,8 +1043,6 @@
                        }
 
                        qh = qh->qh_next.qh;
-                       if (!qh)                /* unlinked? */
-                               goto rescan;
-               } while (qh != ehci->async);
+               } while (qh);
        }
 }
--- ./drivers/usb-dist/hcd/ehci-dbg.c   Mon Dec  2 22:36:50 2002
+++ ./drivers/usb/hcd/ehci-dbg.c        Sat Dec  7 06:06:48 2002
@@ -18,6 +18,29 @@
 
 /* this file is part of ehci-hcd.c */
 
+#ifdef DEBUG
+#define ehci_dbg(ehci, fmt, args...) \
+       printk(KERN_DEBUG "%s %s: " fmt, hcd_name, \
+               (ehci)->hcd.pdev->slot_name, ## args )
+#else
+#define ehci_dbg(ehci, fmt, args...) do { } while (0)
+#endif
+
+#ifdef EHCI_VERBOSE_DEBUG
+#define ehci_vdbg(ehci, fmt, args...) \
+       printk(KERN_DEBUG "%s %s: " fmt, hcd_name, \
+               (ehci)->hcd.pdev->slot_name, ## args )
+#else
+#define ehci_vdbg(ehci, fmt, args...) do { } while (0)
+#endif
+
+#define ehci_info(ehci, fmt, args...) \
+       printk(KERN_INFO "%s %s: " fmt, hcd_name, \
+               (ehci)->hcd.pdev->slot_name, ## args )
+#define ehci_warn(ehci, fmt, args...) \
+       printk(KERN_WARNING "%s %s: " fmt, hcd_name, \
+               (ehci)->hcd.pdev->slot_name, ## args )
+
 #ifdef EHCI_VERBOSE_DEBUG
 #      define vdbg dbg
 #else
@@ -34,7 +57,8 @@
 {
        u32     params = readl (&ehci->caps->hcs_params);
 
-       dbg ("%s hcs_params 0x%x dbg=%d%s cc=%d pcc=%d%s%s ports=%d",
+       ehci_dbg (ehci,
+               "%s hcs_params 0x%x dbg=%d%s cc=%d pcc=%d%s%s ports=%d\n",
                label, params,
                HCS_DEBUG_PORT (params),
                HCS_INDICATOR (params) ? " ind" : "",
@@ -56,9 +80,8 @@
                                ((i & 0x1) ? ((byte)&0xf) : ((byte>>4)&0xf)));
                        strcat(buf, tmp);
                }
-               dbg ("%s: %s portroute %s", 
-                       hcd_to_bus (&ehci->hcd)->bus_name, label,
-                       buf);
+               ehci_dbg (ehci, "%s portroute %s\n",
+                               label, buf);
        }
 }
 #else
@@ -77,19 +100,16 @@
 {
        u32     params = readl (&ehci->caps->hcc_params);
 
-       if (HCC_EXT_CAPS (params)) {
-               // EHCI 0.96 ... could interpret these (legacy?)
-               dbg ("%s extended capabilities at pci %2x",
-                       label, HCC_EXT_CAPS (params));
-       }
        if (HCC_ISOC_CACHE (params)) {
-               dbg ("%s hcc_params %04x caching frame %s%s%s",
+               ehci_dbg (ehci,
+                    "%s hcc_params %04x caching frame %s%s%s\n",
                     label, params,
                     HCC_PGM_FRAMELISTLEN (params) ? "256/512/1024" : "1024",
                     HCC_CANPARK (params) ? " park" : "",
                     HCC_64BIT_ADDR (params) ? " 64 bit addr" : "");
        } else {
-               dbg ("%s hcc_params %04x caching %d uframes %s%s%s",
+               ehci_dbg (ehci,
+                    "%s hcc_params %04x thresh %d uframes %s%s%s\n",
                     label,
                     params,
                     HCC_ISOC_THRES (params),
@@ -235,19 +255,19 @@
 #define dbg_status(ehci, label, status) { \
        char _buf [80]; \
        dbg_status_buf (_buf, sizeof _buf, label, status); \
-       dbg ("%s", _buf); \
+       ehci_dbg (ehci, "%s\n", _buf); \
 }
 
 #define dbg_cmd(ehci, label, command) { \
        char _buf [80]; \
        dbg_command_buf (_buf, sizeof _buf, label, command); \
-       dbg ("%s", _buf); \
+       ehci_dbg (ehci, "%s\n", _buf); \
 }
 
-#define dbg_port(hcd, label, port, status) { \
+#define dbg_port(ehci, label, port, status) { \
        char _buf [80]; \
        dbg_port_buf (_buf, sizeof _buf, label, port, status); \
-       dbg ("%s", _buf); \
+       ehci_dbg (ehci, "%s\n", _buf); \
 }
 
 /*-------------------------------------------------------------------------*/
@@ -272,6 +292,7 @@
 static void qh_lines (struct ehci_qh *qh, char **nextp, unsigned *sizep)
 {
        u32                     scratch;
+       u32                     hw_curr;
        struct list_head        *entry;
        struct ehci_qtd         *td;
        unsigned                temp;
@@ -279,20 +300,22 @@
        char                    *next = *nextp;
 
        scratch = cpu_to_le32p (&qh->hw_info1);
-       temp = snprintf (next, size, "qh/%p dev%d %cs ep%d %08x %08x",
+       hw_curr = cpu_to_le32p (&qh->hw_current);
+       temp = snprintf (next, size, "qh/%p dev%d %cs ep%d %08x %08x (%08x %08x)",
                        qh, scratch & 0x007f,
                        speed_char (scratch),
                        (scratch >> 8) & 0x000f,
-                       scratch, cpu_to_le32p (&qh->hw_info2));
+                       scratch, cpu_to_le32p (&qh->hw_info2),
+                       hw_curr, cpu_to_le32p (&qh->hw_token));
        size -= temp;
        next += temp;
 
        list_for_each (entry, &qh->qtd_list) {
-               td = list_entry (entry, struct ehci_qtd,
-                               qtd_list);
+               td = list_entry (entry, struct ehci_qtd, qtd_list);
                scratch = cpu_to_le32p (&td->hw_token);
                temp = snprintf (next, size,
-                               "\n\ttd/%p %s len=%d %08x urb %p",
+                               "\n\t%std/%p %s len=%d %08x urb %p",
+                               (hw_curr == td->qtd_dma) ? "*" : "",
                                td, ({ char *tmp;
                                 switch ((scratch>>8)&0x03) {
                                 case 0: tmp = "out"; break;
@@ -335,12 +358,8 @@
         * one QH per line, and TDs we know about
         */
        spin_lock_irqsave (&ehci->lock, flags);
-       if (ehci->async) {
-               qh = ehci->async;
-               do {
-                       qh_lines (qh, &next, &size);
-               } while ((qh = qh->qh_next.qh) != ehci->async);
-       }
+       for (qh = ehci->async->qh_next.qh; qh; qh = qh->qh_next.qh)
+               qh_lines (qh, &next, &size);
        if (ehci->reclaim) {
                temp = snprintf (next, size, "\nreclaim =\n");
                size -= temp;
@@ -552,8 +571,8 @@
        size -= temp;
        next += temp;
 
-       temp = snprintf (next, size, "complete %ld unlink %ld qpatch %ld\n",
-               ehci->stats.complete, ehci->stats.unlink, ehci->stats.qpatch);
+       temp = snprintf (next, size, "complete %ld unlink %ld\n",
+               ehci->stats.complete, ehci->stats.unlink);
        size -= temp;
        next += temp;
 #endif
--- ./drivers/usb-dist/hcd/ehci-hub.c   Mon Dec  2 22:36:50 2002
+++ ./drivers/usb/hcd/ehci-hub.c        Sat Dec  7 06:06:48 2002
@@ -40,18 +40,15 @@
 
        /* if reset finished and it's still not enabled -- handoff */
        if (!(port_status & PORT_PE)) {
-               dbg ("%s port %d full speed, give to companion, 0x%x",
-                       hcd_to_bus (&ehci->hcd)->bus_name,
-                       index + 1, port_status);
+               ehci_dbg (ehci, "port %d full speed --> companion\n",
+                       index + 1);
 
                // what happens if HCS_N_CC(params) == 0 ?
                port_status |= PORT_OWNER;
                writel (port_status, &ehci->regs->port_status [index]);
 
        } else
-               dbg ("%s port %d high speed",
-                       hcd_to_bus (&ehci->hcd)->bus_name,
-                       index + 1);
+               ehci_dbg (ehci, "port %d high speed\n", index + 1);
 
        return port_status;
 }
@@ -277,7 +274,7 @@
 #ifndef        EHCI_VERBOSE_DEBUG
        if (status & ~0xffff)   /* only if wPortChange is interesting */
 #endif
-               dbg_port (hcd, "GetStatus", wIndex + 1, temp);
+               dbg_port (ehci, "GetStatus", wIndex + 1, temp);
                // we "know" this alignment is good, caller used kmalloc()...
                *((u32 *) buf) = cpu_to_le32 (status);
                break;
@@ -313,14 +310,12 @@
                        /* line status bits may report this as low speed */
                        if ((temp & (PORT_PE|PORT_CONNECT)) == PORT_CONNECT
                                        && PORT_USB11 (temp)) {
-                               dbg ("%s port %d low speed, give to companion",
-                                       hcd_to_bus (&ehci->hcd)->bus_name,
+                               ehci_dbg (ehci,
+                                       "port %d low speed --> companion\n",
                                        wIndex + 1);
                                temp |= PORT_OWNER;
                        } else {
-                               vdbg ("%s port %d reset",
-                                       hcd_to_bus (&ehci->hcd)->bus_name,
-                                       wIndex + 1);
+                               ehci_vdbg (ehci, "port %d reset", wIndex + 1);
                                temp |= PORT_RESET;
                                temp &= ~PORT_PE;
 

Reply via email to