Thanks for the report Richard,

On 02/11/15(Mon) 10:44, Richard Procter wrote:
> 
> >Synopsis:    tsec(4) tx hangs, flagged OACTIVE
> >Category:    kernel
> >Environment:
>         System      : OpenBSD 5.8
>         Details     : OpenBSD 5.8-current (GENERIC) #105: Mon Sep  7 08:21:16 
> MDT 2015
>                          
> dera...@socppc.openbsd.org:/usr/src/sys/arch/socppc/compile/GENERIC
> 
>       Architecture: OpenBSD.socppc
>       Machine     : socppc
> >Description:
> 
> After at most 15 minutes or so of the loop below the interface tx 
> goes out to lunch and never returns. ifconfig shows it is 
> flagged OACTIVE. The watchdog timeout never appears in dmesg. 
> tcpdump sees rx but no tx. 
> 
> >How-To-Repeat:
>       # while true; do ls -lR /; done
> 
> >Fix:
>       # ifconfig tsec${x} down && ifconfig tsec${x} up
> 
> The hang looks due to IFF_OACTIVE being set on error return 
> from tsec_encap():
> 
> if_tsec.c: tsec_start()
>   534                  if (tsec_encap(sc, m, &idx)) {
>   535                          ifp->if_flags |= IFF_OACTIVE;
>   536                          break;
>   537                  }
> 
> Although the flag is unset by tsec_tx_proc() this won't occur if
> there are no pending tx (nor will the watchdog be running). Progess
> therefore halts as tsec_start() returns immediately on IFF_OACTIVE.
> 
> The root problem is the driver cannot process an arbitrary number of 
> mbuf fragments as they must be mapped for DMA, and these DMA maps are 
> statically allocated. tsec_start() then mistakes that error return 
> and halts tx processing.
> 
> This is problem exists in other drivers but is masked by their 
> higher upper limit on mbuf fragments. e.g. vge(4), and also 
> sis(4), em(4) but only as a theoretical concern.
> 
> Two alternative fixes appear below. The first assumes the stack 
> in practice issues fewer than 32 fragments. The second doesn't. 
> Both tested on RB600A. 

I'm ok with the second diff if you remove the printf().

> 
> best, 
> Richard. 
> 
> Index: if_tsec.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/socppc/dev/if_tsec.c,v
> retrieving revision 1.38
> diff -u -p -u -r1.38 if_tsec.c
> --- if_tsec.c 25 Oct 2015 13:22:09 -0000      1.38
> +++ if_tsec.c 1 Nov 2015 20:46:41 -0000
> @@ -227,7 +227,7 @@ struct tsec_buf {
>  };
>  
>  #define TSEC_NTXDESC 256
> -#define TSEC_NTXSEGS 16
> +#define TSEC_NTXSEGS 32
>  
>  #define TSEC_NRXDESC 256
>  
> @@ -1131,8 +1131,10 @@ tsec_encap(struct tsec_softc *sc, struct
>       cur = frag = *idx;
>       map = sc->sc_txbuf[cur].tb_map;
>  
> -     if (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT))
> -             return (ENOBUFS);
> +     if (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT)) {
> +             printf("%s: can't map mbuf\n", sc->sc_dev.dv_xname);
> +             return (ENOBUFS); /* XXX tx will go out to lunch */
> +     }
>  
>       if (map->dm_nsegs > (TSEC_NTXDESC - sc->sc_tx_cnt - 2)) {
>               bus_dmamap_unload(sc->sc_dmat, map);
> 
> Index: if_tsec.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/socppc/dev/if_tsec.c,v
> retrieving revision 1.38
> diff -u -p -u -r1.38 if_tsec.c
> --- if_tsec.c 25 Oct 2015 13:22:09 -0000      1.38
> +++ if_tsec.c 1 Nov 2015 20:44:24 -0000
> @@ -514,7 +514,7 @@ tsec_start(struct ifnet *ifp)
>  {
>       struct tsec_softc *sc = ifp->if_softc;
>       struct mbuf *m;
> -     int idx;
> +     int error, idx;
>  
>       if (!(ifp->if_flags & IFF_RUNNING))
>               return;
> @@ -531,9 +531,16 @@ tsec_start(struct ifnet *ifp)
>               if (m == NULL)
>                       break;
>  
> -             if (tsec_encap(sc, m, &idx)) {
> +             error = tsec_encap(sc, m, &idx);
> +             if (error == ENOBUFS) {
>                       ifp->if_flags |= IFF_OACTIVE;
>                       break;
> +             } 
> +             if (error == EFBIG) {
> +                     IFQ_DEQUEUE(&ifp->if_snd, m);
> +                     m_freem(m); /* give up: drop it */
> +                     ifp->if_oerrors++;
> +                     continue;
>               }
>  
>               /* Now we are committed to transmit the packet. */
> @@ -1131,8 +1138,14 @@ tsec_encap(struct tsec_softc *sc, struct
>       cur = frag = *idx;
>       map = sc->sc_txbuf[cur].tb_map;
>  
> -     if (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT))
> -             return (ENOBUFS);
> +     if (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT)) {
> +             if (m_defrag(m, M_DONTWAIT))
> +                     return (EFBIG);
> +             if (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT)) {
> +                     printf("%s: can't map mbuf\n", sc->sc_dev.dv_xname);
> +                     return (EFBIG);
> +             }
> +     }
>  
>       if (map->dm_nsegs > (TSEC_NTXDESC - sc->sc_tx_cnt - 2)) {
>               bus_dmamap_unload(sc->sc_dmat, map);
> 
> 
> OpenBSD 5.8-current (GENERIC) #105: Mon Sep  7 08:21:16 MDT 2015
>     dera...@socppc.openbsd.org:/usr/src/sys/arch/socppc/compile/GENERIC
> real mem = 134217728 (128MB)
> avail mem = 123711488 (117MB)
> mainbus0 at root
> cpu0 at mainbus0: 8347
> obio0 at mainbus0
> "wdt" at obio0 offset 0x00200 not configured
> com0 at obio0 offset 0x04500 ivec 9: ns16550a, 16 byte fifo
> com0: console
> socpcic0 at obio0 offset 0x08500
> pci0 at socpcic0 bus 0
> "Freescale MPC8343" rev 0x30 at pci0 dev 0 function 0 not configured
> vge0 at pci0 dev 11 function 0 "VIA VT612x" rev 0x11: ivec 21, address 
> 00:0c:42:20:4e:aa
> ciphy0 at vge0 phy 1: CS8201 10/100/1000TX PHY, rev. 2
> ipic0 at obio0 offset 0x00700
> "mdio" at obio0 offset 0x24520 not configured
> tsec0 at obio0 offset 0x25000 ivec 35: address 00:0c:42:20:4e:a8
> rgephy0 at tsec0 phy 0: RTL8169S/8110S/8211 PHY, rev. 2
> tsec1 at obio0 offset 0x24000 ivec 32: address 00:0c:42:20:4e:a9
> rgephy1 at tsec1 phy 1: RTL8169S/8110S/8211 PHY, rev. 2
> "gpio" at obio0 offset 0x00c08 not configured
> "beeper" at obio0 offset 0x00500 not configured
> "led" at obio0 not configured
> "fancon" at mainbus0 not configured
> "nand" at mainbus0 not configured
> "nnand" at mainbus0 not configured
> "flash" at mainbus0 not configured
> wdc0 at mainbus0
> wd0 at wdc0 channel 0 drive 0: <SanDisk SDCFH2-4096>
> wd0: 4-sector PIO, LBA, 3919MB, 8027712 sectors
> wd0(wdc0:0:0): using BIOS timings
> wdc1 at mainbus0
> vscsi0 at root
> scsibus0 at vscsi0: 256 targets
> softraid0 at root
> scsibus1 at softraid0: 256 targets
> root on wd0a (84ccd8ef8549b483.a) swap on wd0b dump on wd0b
> WARNING: unable to get date/time -- CHECK AND RESET THE DATE!
> 
> usbdevs:
> usbdevs: no USB controllers found
> 
> 

Reply via email to