On 2020/07/19 11:25, Marcus Glocker wrote: > On Sun, 19 Jul 2020 02:25:30 +0000 > [email protected] wrote: > >> hi, >> >> It works on AMD Bolton xHCI (78141022), Intel PCH (1e318086), >> and ASM1042 (10421b21). >> I simply play with ffplay -f v4l2 /dev/video0 to test. > > If your cam supports MJPEG it's good to add '-input_format mjpeg' with > higher resolutions like 1280x720, because that will generated varying > image sizes, which hit the 64k memory boundary more often, and thus > generate potentially more chained TDs.
Thank you for useful information. My webcam supprots at most 640x480, but 1024 bytes/frame x (2+1) maxburst x 40 frames = 122880 bytes/xfer is enough to observe TD fragmentation. >> At this moment it does not work on VL805, but I have no idea. >> I'll investigate furthermore... > > Oh dear :-) - Ok. > >> BTW I think xhci_ring_alloc should free ring dma buffer >> if trb_processed[] allocation fails. > > Ah right, I'll add the usbd_dma_contig_free() there - Thanks. > >> >> On 2020/07/18 14:47, Marcus Glocker wrote: >>> Hey, >>> >>> On Sat, 18 Jul 2020 00:14:32 +0000 >>> [email protected] wrote: >>> >>>> hi, >>>> >>>> On 2020/07/15 21:28, [email protected] wrote: >>>>> hi, >>>>> >>>>> The patch works well on Intel PCH xhci, but not on AMD Bolton >>>>> xHCI. I'll investigate this problem. >>>> >>>> Bolton xHCI sometimes raises following events. >>>> (I added printf Completion Code.) >>>> >>>> #246 cc 13 remain 0 type 5 origlen 2048 frlengths[6] 2048 >>>> #247 cc 1 remain 0 type 1 origlen 1024 frlengths[6] 3072 >>>> >>>> It seems this behavior violates the spec. 4.10.1.1. >>>> When the remain of 1. TRB is 0, CC should be SUCCESS, not >>>> SHORT_PKT, and HC should not raise interrupt. >>>> The problem is how many length 2. TRB transferred. >>>> If real CC of 1. TRB = SUCCESS, we should add 2. TRB length to >>>> frlengths. If SHORT_PKT, we should keep frlengths as is. >>>> Actually with the latter assumption I can see video smoothly w/o >>>> errors. >>>> >>>> I changed your patch to skip adding to frlengths and actlen >>>> if previous TRB is processed AND remain == 0. >>> >>> Interesting! Thanks for tracing this down. >>> So in general we can say that for a chained TD, if the first TRB is >>> SHORT, we can simply skip the second TRB. >>> >>> To make the code a bit more understandable, and use the new >>> trb_processed flag to control this, I made another diff which keeps >>> track whether the TRB was not processed, processed, or processed >>> short. >>> >>> It also includes the malloc NULL check and replaces malloc by >>> mallocarray as suggested by tb@. >>> >>> Does this diff also work for you on your AMD xhci? >>> >>> >>> Index: xhci.c >>> =================================================================== >>> RCS file: /cvs/src/sys/dev/usb/xhci.c,v >>> retrieving revision 1.116 >>> diff -u -p -u -p -r1.116 xhci.c >>> --- xhci.c 30 Jun 2020 10:21:59 -0000 1.116 >>> +++ xhci.c 18 Jul 2020 14:20:28 -0000 >>> @@ -82,7 +82,7 @@ void xhci_event_xfer(struct xhci_softc * >>> int xhci_event_xfer_generic(struct xhci_softc *, struct >>> usbd_xfer *, struct xhci_pipe *, uint32_t, int, uint8_t, uint8_t, >>> uint8_t); int xhci_event_xfer_isoc(struct usbd_xfer *, >>> struct xhci_pipe *, >>> - uint32_t, int); >>> + uint32_t, int, uint8_t); >>> void xhci_event_command(struct xhci_softc *, uint64_t); >>> void xhci_event_port_change(struct xhci_softc *, uint64_t, >>> uint32_t); int xhci_pipe_init(struct xhci_softc *, struct >>> usbd_pipe *); @@ -800,7 +800,7 @@ xhci_event_xfer(struct xhci_softc >>> *sc, u return; >>> break; >>> case UE_ISOCHRONOUS: >>> - if (xhci_event_xfer_isoc(xfer, xp, remain, >>> trb_idx)) >>> + if (xhci_event_xfer_isoc(xfer, xp, remain, >>> trb_idx, code)) return; >>> break; >>> default: >>> @@ -927,13 +927,23 @@ xhci_event_xfer_generic(struct xhci_soft >>> >>> int >>> xhci_event_xfer_isoc(struct usbd_xfer *xfer, struct xhci_pipe *xp, >>> - uint32_t remain, int trb_idx) >>> + uint32_t remain, int trb_idx, uint8_t code) >>> { >>> struct usbd_xfer *skipxfer; >>> struct xhci_xfer *xx = (struct xhci_xfer *)xfer; >>> - int trb0_idx, frame_idx = 0; >>> + int trb0_idx, frame_idx = 0, skip_trb = 0; >>> >>> KASSERT(xx->index >= 0); >>> + >>> + switch (code) { >>> + case XHCI_CODE_SHORT_XFER: >>> + xp->ring.trb_processed[trb_idx] = >>> TRB_PROCESSED_SHORT; >>> + break; >>> + default: >>> + xp->ring.trb_processed[trb_idx] = >>> TRB_PROCESSED_YES; >>> + break; >>> + } >>> + >>> trb0_idx = >>> ((xx->index + xp->ring.ntrb) - xx->ntrb) % >>> (xp->ring.ntrb - 1); >>> @@ -958,15 +968,21 @@ xhci_event_xfer_isoc(struct usbd_xfer *x >>> trb0_idx = xp->ring.ntrb - 2; >>> else >>> trb0_idx = trb_idx - 1; >>> - if (xfer->frlengths[frame_idx] == 0) { >>> + if (xp->ring.trb_processed[trb0_idx] == >>> TRB_PROCESSED_NO) { xfer->frlengths[frame_idx] = >>> XHCI_TRB_LEN(letoh32( xp->ring.trbs[trb0_idx].trb_status)); >>> + } else if (xp->ring.trb_processed[trb0_idx] == >>> + TRB_PROCESSED_SHORT) { >>> + skip_trb = 1; >>> } >>> } >>> >>> - xfer->frlengths[frame_idx] += >>> - >>> XHCI_TRB_LEN(letoh32(xp->ring.trbs[trb_idx].trb_status)) - remain; >>> - xfer->actlen += xfer->frlengths[frame_idx]; >>> + if (!skip_trb) { >>> + xfer->frlengths[frame_idx] += >>> + >>> XHCI_TRB_LEN(letoh32(xp->ring.trbs[trb_idx].trb_status)) - >>> + remain; >>> + xfer->actlen += xfer->frlengths[frame_idx]; >>> + } >>> >>> if (xx->index != trb_idx) >>> return (1); >>> @@ -1702,6 +1718,15 @@ xhci_ring_alloc(struct xhci_softc *sc, s >>> >>> ring->ntrb = ntrb; >>> >>> + ring->trb_processed = >>> + mallocarray(ring->ntrb, sizeof(*ring->trb_processed), >>> M_DEVBUF, >>> + M_NOWAIT); >>> + if (ring->trb_processed == NULL) { >>> + printf("%s: unable to allocate ring trb_processed >>> array\n", >>> + DEVNAME(sc)); >>> + return (ENOMEM); >>> + } >>> + >>> xhci_ring_reset(sc, ring); >>> >>> return (0); >>> @@ -1710,6 +1735,8 @@ xhci_ring_alloc(struct xhci_softc *sc, s >>> void >>> xhci_ring_free(struct xhci_softc *sc, struct xhci_ring *ring) >>> { >>> + free(ring->trb_processed, M_DEVBUF, >>> + ring->ntrb * sizeof(*ring->trb_processed)); >>> usbd_dma_contig_free(&sc->sc_bus, &ring->dma); >>> } >>> >>> @@ -1721,6 +1748,8 @@ xhci_ring_reset(struct xhci_softc *sc, s >>> size = ring->ntrb * sizeof(struct xhci_trb); >>> >>> memset(ring->trbs, 0, size); >>> + memset(ring->trb_processed, 0, >>> + ring->ntrb * sizeof(*ring->trb_processed)); >>> >>> ring->index = 0; >>> ring->toggle = XHCI_TRB_CYCLE; >>> @@ -1805,6 +1834,8 @@ xhci_ring_produce(struct xhci_softc *sc, >>> ring->index = 0; >>> ring->toggle ^= 1; >>> } >>> + >>> + ring->trb_processed[(trb - &ring->trbs[0])] = >>> TRB_PROCESSED_NO; >>> return (trb); >>> } >>> Index: xhcivar.h >>> =================================================================== >>> RCS file: /cvs/src/sys/dev/usb/xhcivar.h,v >>> retrieving revision 1.11 >>> diff -u -p -u -p -r1.11 xhcivar.h >>> --- xhcivar.h 6 Oct 2019 17:30:00 -0000 1.11 >>> +++ xhcivar.h 18 Jul 2020 14:20:28 -0000 >>> @@ -44,6 +44,12 @@ struct xhci_xfer { >>> >>> struct xhci_ring { >>> struct xhci_trb *trbs; >>> +enum { >>> + TRB_PROCESSED_NO, >>> + TRB_PROCESSED_YES, >>> + TRB_PROCESSED_SHORT >>> +}; >>> + uint8_t *trb_processed; >>> size_t ntrb; >>> struct usbd_dma_info dma; >>> >>>
