On Sun, Jul 19, 2020 at 02:12:21PM +0000, [email protected] wrote: > On 2020/07/19 11:25, Marcus Glocker wrote: > > On Sun, 19 Jul 2020 02:25:30 +0000 > > [email protected] wrote: > > > >> hi, > >> > >> It works on AMD Bolton xHCI (78141022), Intel PCH (1e318086), > >> and ASM1042 (10421b21). > >> I simply play with ffplay -f v4l2 /dev/video0 to test. > > > > If your cam supports MJPEG it's good to add '-input_format mjpeg' with > > higher resolutions like 1280x720, because that will generated varying > > image sizes, which hit the 64k memory boundary more often, and thus > > generate potentially more chained TDs. > > Thank you for useful information. > My webcam supprots at most 640x480, but 1024 bytes/frame x (2+1) maxburst > x 40 frames = 122880 bytes/xfer is enough to observe TD fragmentation. > > > >> At this moment it does not work on VL805, but I have no idea. > >> I'll investigate furthermore...
Did you already had a chance to figure out something regarding the issue you faced on your VL805 controller? I'm running the diff here since then on the Intel xHCI controller and couldn't re-produce any errors using different uvideo(4) and uaudio(4) devices. > > Oh dear :-) - Ok. > > > >> BTW I think xhci_ring_alloc should free ring dma buffer > >> if trb_processed[] allocation fails. > > > > Ah right, I'll add the usbd_dma_contig_free() there - Thanks. > > > >> > >> On 2020/07/18 14:47, Marcus Glocker wrote: > >>> Hey, > >>> > >>> On Sat, 18 Jul 2020 00:14:32 +0000 > >>> [email protected] wrote: > >>> > >>>> hi, > >>>> > >>>> On 2020/07/15 21:28, [email protected] wrote: > >>>>> hi, > >>>>> > >>>>> The patch works well on Intel PCH xhci, but not on AMD Bolton > >>>>> xHCI. I'll investigate this problem. > >>>> > >>>> Bolton xHCI sometimes raises following events. > >>>> (I added printf Completion Code.) > >>>> > >>>> #246 cc 13 remain 0 type 5 origlen 2048 frlengths[6] 2048 > >>>> #247 cc 1 remain 0 type 1 origlen 1024 frlengths[6] 3072 > >>>> > >>>> It seems this behavior violates the spec. 4.10.1.1. > >>>> When the remain of 1. TRB is 0, CC should be SUCCESS, not > >>>> SHORT_PKT, and HC should not raise interrupt. > >>>> The problem is how many length 2. TRB transferred. > >>>> If real CC of 1. TRB = SUCCESS, we should add 2. TRB length to > >>>> frlengths. If SHORT_PKT, we should keep frlengths as is. > >>>> Actually with the latter assumption I can see video smoothly w/o > >>>> errors. > >>>> > >>>> I changed your patch to skip adding to frlengths and actlen > >>>> if previous TRB is processed AND remain == 0. > >>> > >>> Interesting! Thanks for tracing this down. > >>> So in general we can say that for a chained TD, if the first TRB is > >>> SHORT, we can simply skip the second TRB. > >>> > >>> To make the code a bit more understandable, and use the new > >>> trb_processed flag to control this, I made another diff which keeps > >>> track whether the TRB was not processed, processed, or processed > >>> short. > >>> > >>> It also includes the malloc NULL check and replaces malloc by > >>> mallocarray as suggested by tb@. > >>> > >>> Does this diff also work for you on your AMD xhci? > >>> > >>> > >>> Index: xhci.c > >>> =================================================================== > >>> RCS file: /cvs/src/sys/dev/usb/xhci.c,v > >>> retrieving revision 1.116 > >>> diff -u -p -u -p -r1.116 xhci.c > >>> --- xhci.c 30 Jun 2020 10:21:59 -0000 1.116 > >>> +++ xhci.c 18 Jul 2020 14:20:28 -0000 > >>> @@ -82,7 +82,7 @@ void xhci_event_xfer(struct xhci_softc * > >>> int xhci_event_xfer_generic(struct xhci_softc *, struct > >>> usbd_xfer *, struct xhci_pipe *, uint32_t, int, uint8_t, uint8_t, > >>> uint8_t); int xhci_event_xfer_isoc(struct usbd_xfer *, > >>> struct xhci_pipe *, > >>> - uint32_t, int); > >>> + uint32_t, int, uint8_t); > >>> void xhci_event_command(struct xhci_softc *, uint64_t); > >>> void xhci_event_port_change(struct xhci_softc *, uint64_t, > >>> uint32_t); int xhci_pipe_init(struct xhci_softc *, struct > >>> usbd_pipe *); @@ -800,7 +800,7 @@ xhci_event_xfer(struct xhci_softc > >>> *sc, u return; > >>> break; > >>> case UE_ISOCHRONOUS: > >>> - if (xhci_event_xfer_isoc(xfer, xp, remain, > >>> trb_idx)) > >>> + if (xhci_event_xfer_isoc(xfer, xp, remain, > >>> trb_idx, code)) return; > >>> break; > >>> default: > >>> @@ -927,13 +927,23 @@ xhci_event_xfer_generic(struct xhci_soft > >>> > >>> int > >>> xhci_event_xfer_isoc(struct usbd_xfer *xfer, struct xhci_pipe *xp, > >>> - uint32_t remain, int trb_idx) > >>> + uint32_t remain, int trb_idx, uint8_t code) > >>> { > >>> struct usbd_xfer *skipxfer; > >>> struct xhci_xfer *xx = (struct xhci_xfer *)xfer; > >>> - int trb0_idx, frame_idx = 0; > >>> + int trb0_idx, frame_idx = 0, skip_trb = 0; > >>> > >>> KASSERT(xx->index >= 0); > >>> + > >>> + switch (code) { > >>> + case XHCI_CODE_SHORT_XFER: > >>> + xp->ring.trb_processed[trb_idx] = > >>> TRB_PROCESSED_SHORT; > >>> + break; > >>> + default: > >>> + xp->ring.trb_processed[trb_idx] = > >>> TRB_PROCESSED_YES; > >>> + break; > >>> + } > >>> + > >>> trb0_idx = > >>> ((xx->index + xp->ring.ntrb) - xx->ntrb) % > >>> (xp->ring.ntrb - 1); > >>> @@ -958,15 +968,21 @@ xhci_event_xfer_isoc(struct usbd_xfer *x > >>> trb0_idx = xp->ring.ntrb - 2; > >>> else > >>> trb0_idx = trb_idx - 1; > >>> - if (xfer->frlengths[frame_idx] == 0) { > >>> + if (xp->ring.trb_processed[trb0_idx] == > >>> TRB_PROCESSED_NO) { xfer->frlengths[frame_idx] = > >>> XHCI_TRB_LEN(letoh32( xp->ring.trbs[trb0_idx].trb_status)); > >>> + } else if (xp->ring.trb_processed[trb0_idx] == > >>> + TRB_PROCESSED_SHORT) { > >>> + skip_trb = 1; > >>> } > >>> } > >>> > >>> - xfer->frlengths[frame_idx] += > >>> - > >>> XHCI_TRB_LEN(letoh32(xp->ring.trbs[trb_idx].trb_status)) - remain; > >>> - xfer->actlen += xfer->frlengths[frame_idx]; > >>> + if (!skip_trb) { > >>> + xfer->frlengths[frame_idx] += > >>> + > >>> XHCI_TRB_LEN(letoh32(xp->ring.trbs[trb_idx].trb_status)) - > >>> + remain; > >>> + xfer->actlen += xfer->frlengths[frame_idx]; > >>> + } > >>> > >>> if (xx->index != trb_idx) > >>> return (1); > >>> @@ -1702,6 +1718,15 @@ xhci_ring_alloc(struct xhci_softc *sc, s > >>> > >>> ring->ntrb = ntrb; > >>> > >>> + ring->trb_processed = > >>> + mallocarray(ring->ntrb, sizeof(*ring->trb_processed), > >>> M_DEVBUF, > >>> + M_NOWAIT); > >>> + if (ring->trb_processed == NULL) { > >>> + printf("%s: unable to allocate ring trb_processed > >>> array\n", > >>> + DEVNAME(sc)); > >>> + return (ENOMEM); > >>> + } > >>> + > >>> xhci_ring_reset(sc, ring); > >>> > >>> return (0); > >>> @@ -1710,6 +1735,8 @@ xhci_ring_alloc(struct xhci_softc *sc, s > >>> void > >>> xhci_ring_free(struct xhci_softc *sc, struct xhci_ring *ring) > >>> { > >>> + free(ring->trb_processed, M_DEVBUF, > >>> + ring->ntrb * sizeof(*ring->trb_processed)); > >>> usbd_dma_contig_free(&sc->sc_bus, &ring->dma); > >>> } > >>> > >>> @@ -1721,6 +1748,8 @@ xhci_ring_reset(struct xhci_softc *sc, s > >>> size = ring->ntrb * sizeof(struct xhci_trb); > >>> > >>> memset(ring->trbs, 0, size); > >>> + memset(ring->trb_processed, 0, > >>> + ring->ntrb * sizeof(*ring->trb_processed)); > >>> > >>> ring->index = 0; > >>> ring->toggle = XHCI_TRB_CYCLE; > >>> @@ -1805,6 +1834,8 @@ xhci_ring_produce(struct xhci_softc *sc, > >>> ring->index = 0; > >>> ring->toggle ^= 1; > >>> } > >>> + > >>> + ring->trb_processed[(trb - &ring->trbs[0])] = > >>> TRB_PROCESSED_NO; > >>> return (trb); > >>> } > >>> Index: xhcivar.h > >>> =================================================================== > >>> RCS file: /cvs/src/sys/dev/usb/xhcivar.h,v > >>> retrieving revision 1.11 > >>> diff -u -p -u -p -r1.11 xhcivar.h > >>> --- xhcivar.h 6 Oct 2019 17:30:00 -0000 1.11 > >>> +++ xhcivar.h 18 Jul 2020 14:20:28 -0000 > >>> @@ -44,6 +44,12 @@ struct xhci_xfer { > >>> > >>> struct xhci_ring { > >>> struct xhci_trb *trbs; > >>> +enum { > >>> + TRB_PROCESSED_NO, > >>> + TRB_PROCESSED_YES, > >>> + TRB_PROCESSED_SHORT > >>> +}; > >>> + uint8_t *trb_processed; > >>> size_t ntrb; > >>> struct usbd_dma_info dma; > >>> > >>> >
