Re: umb: aggregate packets on tx
On Sat, Apr 15, 2017 at 11:21:41PM +0200, Alexander Bluhm wrote: > On Mon, Feb 20, 2017 at 04:35:10PM +0100, Gerhard Roth wrote: > > On Mon, 12 Dec 2016 14:50:50 +0100 Gerhard Roth> > wrote: > > > The current umb(4) implementation needs one USB transfer for every packet > > > that is sent. With the following patch, we can now aggregate several > > > packets from the ifq into one single USB transfer. > > > > > > This may speed up the tx path. And even if it doesn't, at least it > > > reduces the number of transfers required. > > I am running with this for two days now. Works fine. > > umb0 at uhub0 port 4 configuration 1 interface 6 "Lenovo H5321 gw" rev > 2.00/0.00 addr 2 > > Diff looks reasonable. OK bluhm@ Yes, the diff makes sense. OK by me as well. Sent over umb0 at uhub0 port 4 configuration 1 interface 0 "Sierra Wireless Inc. Sierra Wireless EM7345 4G LTE" rev 2.00/17.29 addr 7 with patch applied. Thanks!
Re: umb: aggregate packets on tx
On Mon, Feb 20, 2017 at 04:35:10PM +0100, Gerhard Roth wrote: > On Mon, 12 Dec 2016 14:50:50 +0100 Gerhard Rothwrote: > > The current umb(4) implementation needs one USB transfer for every packet > > that is sent. With the following patch, we can now aggregate several > > packets from the ifq into one single USB transfer. > > > > This may speed up the tx path. And even if it doesn't, at least it > > reduces the number of transfers required. I am running with this for two days now. Works fine. umb0 at uhub0 port 4 configuration 1 interface 6 "Lenovo H5321 gw" rev 2.00/0.00 addr 2 Diff looks reasonable. OK bluhm@
Re: umb: aggregate packets on tx
On Mon, 12 Dec 2016 14:50:50 +0100 Gerhard Rothwrote: > The current umb(4) implementation needs one USB transfer for every packet > that is sent. With the following patch, we can now aggregate several > packets from the ifq into one single USB transfer. > > This may speed up the tx path. And even if it doesn't, at least it > reduces the number of transfers required. > > > Gerhard > Ping. Anyone willing to ok this? (Patch below updated to match current). Gerhard Index: sys/dev/usb/if_umb.c === RCS file: /cvs/src/sys/dev/usb/if_umb.c,v retrieving revision 1.9 diff -u -p -u -p -r1.9 if_umb.c --- sys/dev/usb/if_umb.c22 Jan 2017 10:17:39 - 1.9 +++ sys/dev/usb/if_umb.c20 Feb 2017 07:44:40 - @@ -156,7 +156,7 @@ int umb_decode_connect_info(struct umb int umb_decode_ip_configuration(struct umb_softc *, void *, int); voidumb_rx(struct umb_softc *); voidumb_rxeof(struct usbd_xfer *, void *, usbd_status); -int umb_encap(struct umb_softc *, struct mbuf *); +int umb_encap(struct umb_softc *); voidumb_txeof(struct usbd_xfer *, void *, usbd_status); voidumb_decap(struct umb_softc *, struct usbd_xfer *); @@ -299,6 +299,7 @@ umb_attach(struct device *parent, struct sc->sc_udev = uaa->device; sc->sc_ctrl_ifaceno = uaa->ifaceno; + ml_init(>sc_tx_ml); /* * Some MBIM hardware does not provide the mandatory CDC Union @@ -583,8 +584,25 @@ umb_ncm_setup(struct umb_softc *sc) UGETW(np.wLength) == sizeof (np)) { sc->sc_rx_bufsz = UGETDW(np.dwNtbInMaxSize); sc->sc_tx_bufsz = UGETDW(np.dwNtbOutMaxSize); - } else + sc->sc_maxdgram = UGETW(np.wNtbOutMaxDatagrams); + sc->sc_align = UGETW(np.wNdpOutAlignment); + sc->sc_ndp_div = UGETW(np.wNdpOutDivisor); + sc->sc_ndp_remainder = UGETW(np.wNdpOutPayloadRemainder); + /* Validate values */ + if (!powerof2(sc->sc_align) || sc->sc_align == 0 || + sc->sc_align >= sc->sc_tx_bufsz) + sc->sc_align = sizeof (uint32_t); + if (!powerof2(sc->sc_ndp_div) || sc->sc_ndp_div == 0 || + sc->sc_ndp_div >= sc->sc_tx_bufsz) + sc->sc_ndp_div = sizeof (uint32_t); + if (sc->sc_ndp_remainder >= sc->sc_ndp_div) + sc->sc_ndp_remainder = 0; + } else { sc->sc_rx_bufsz = sc->sc_tx_bufsz = 8 * 1024; + sc->sc_maxdgram = 0; + sc->sc_align = sc->sc_ndp_div = sizeof (uint32_t); + sc->sc_ndp_remainder = 0; + } } int @@ -593,12 +611,12 @@ umb_alloc_xfers(struct umb_softc *sc) if (!sc->sc_rx_xfer) { if ((sc->sc_rx_xfer = usbd_alloc_xfer(sc->sc_udev)) != NULL) sc->sc_rx_buf = usbd_alloc_buffer(sc->sc_rx_xfer, - sc->sc_rx_bufsz + MBIM_HDR32_LEN); + sc->sc_rx_bufsz); } if (!sc->sc_tx_xfer) { if ((sc->sc_tx_xfer = usbd_alloc_xfer(sc->sc_udev)) != NULL) sc->sc_tx_buf = usbd_alloc_buffer(sc->sc_tx_xfer, - sc->sc_tx_bufsz + MBIM_HDR16_LEN); + sc->sc_tx_bufsz); } return (sc->sc_rx_buf && sc->sc_tx_buf) ? 1 : 0; } @@ -617,10 +635,7 @@ umb_free_xfers(struct umb_softc *sc) sc->sc_tx_xfer = NULL; sc->sc_tx_buf = NULL; } - if (sc->sc_tx_m) { - m_freem(sc->sc_tx_m); - sc->sc_tx_m = NULL; - } + ml_purge(>sc_tx_ml); } int @@ -792,35 +807,91 @@ umb_input(struct ifnet *ifp, struct mbuf return 1; } +static inline int +umb_align(size_t bufsz, int offs, int alignment, int remainder) +{ + size_t m = alignment - 1; + int align; + + align = (((size_t)offs + m) & ~m) - alignment + remainder; + if (align < offs) + align += alignment; + if (align > bufsz) + align = bufsz; + return align - offs; +} + +static inline int +umb_padding(void *buf, size_t bufsz, int offs, int alignment, int remainder) +{ + int nb; + + nb = umb_align(bufsz, offs, alignment, remainder); + if (nb > 0) + memset(buf + offs, 0, nb); + return nb; +} + void umb_start(struct ifnet *ifp) { struct umb_softc *sc = ifp->if_softc; - struct mbuf *m_head = NULL; + struct mbuf *m = NULL; + int ndgram = 0; + int offs, plen, len, mlen; + int maxalign; if (usbd_is_dying(sc->sc_udev) || !(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(>if_snd)) return; -
Re: umb: aggregate packets on tx
On Mon, Dec 12, 2016 at 02:50:50PM +0100, Gerhard Roth wrote: > The current umb(4) implementation needs one USB transfer for every packet > that is sent. With the following patch, we can now aggregate several > packets from the ifq into one single USB transfer. > > This may speed up the tx path. And even if it doesn't, at least it > reduces the number of transfers required. I applied this diff this morning and everything has been working smoothly so far. There are no obvious regressions that I have noticed. This is on an X1 Carbon (4th Gen) with an EM7455 just like the X260 has. Bryan
umb: aggregate packets on tx
The current umb(4) implementation needs one USB transfer for every packet that is sent. With the following patch, we can now aggregate several packets from the ifq into one single USB transfer. This may speed up the tx path. And even if it doesn't, at least it reduces the number of transfers required. Gerhard Index: sys/dev/usb/if_umb.c === RCS file: /cvs/src/sys/dev/usb/if_umb.c,v retrieving revision 1.8 diff -u -p -u -p -r1.8 if_umb.c --- sys/dev/usb/if_umb.c25 Nov 2016 12:43:26 - 1.8 +++ sys/dev/usb/if_umb.c12 Dec 2016 13:38:34 - @@ -156,7 +156,7 @@ int umb_decode_connect_info(struct umb int umb_decode_ip_configuration(struct umb_softc *, void *, int); voidumb_rx(struct umb_softc *); voidumb_rxeof(struct usbd_xfer *, void *, usbd_status); -int umb_encap(struct umb_softc *, struct mbuf *); +int umb_encap(struct umb_softc *); voidumb_txeof(struct usbd_xfer *, void *, usbd_status); voidumb_decap(struct umb_softc *, struct usbd_xfer *); @@ -299,6 +299,7 @@ umb_attach(struct device *parent, struct sc->sc_udev = uaa->device; sc->sc_ctrl_ifaceno = uaa->ifaceno; + ml_init(>sc_tx_ml); /* * Some MBIM hardware does not provide the mandatory CDC Union @@ -583,8 +584,25 @@ umb_ncm_setup(struct umb_softc *sc) UGETW(np.wLength) == sizeof (np)) { sc->sc_rx_bufsz = UGETDW(np.dwNtbInMaxSize); sc->sc_tx_bufsz = UGETDW(np.dwNtbOutMaxSize); - } else + sc->sc_maxdgram = UGETW(np.wNtbOutMaxDatagrams); + sc->sc_align = UGETW(np.wNdpOutAlignment); + sc->sc_ndp_div = UGETW(np.wNdpOutDivisor); + sc->sc_ndp_remainder = UGETW(np.wNdpOutPayloadRemainder); + /* Validate values */ + if (!powerof2(sc->sc_align) || sc->sc_align == 0 || + sc->sc_align >= sc->sc_tx_bufsz) + sc->sc_align = sizeof (uint32_t); + if (!powerof2(sc->sc_ndp_div) || sc->sc_ndp_div == 0 || + sc->sc_ndp_div >= sc->sc_tx_bufsz) + sc->sc_ndp_div = sizeof (uint32_t); + if (sc->sc_ndp_remainder >= sc->sc_ndp_div) + sc->sc_ndp_remainder = 0; + } else { sc->sc_rx_bufsz = sc->sc_tx_bufsz = 8 * 1024; + sc->sc_maxdgram = 0; + sc->sc_align = sc->sc_ndp_div = sizeof (uint32_t); + sc->sc_ndp_remainder = 0; + } } int @@ -593,12 +611,12 @@ umb_alloc_xfers(struct umb_softc *sc) if (!sc->sc_rx_xfer) { if ((sc->sc_rx_xfer = usbd_alloc_xfer(sc->sc_udev)) != NULL) sc->sc_rx_buf = usbd_alloc_buffer(sc->sc_rx_xfer, - sc->sc_rx_bufsz + MBIM_HDR32_LEN); + sc->sc_rx_bufsz); } if (!sc->sc_tx_xfer) { if ((sc->sc_tx_xfer = usbd_alloc_xfer(sc->sc_udev)) != NULL) sc->sc_tx_buf = usbd_alloc_buffer(sc->sc_tx_xfer, - sc->sc_tx_bufsz + MBIM_HDR16_LEN); + sc->sc_tx_bufsz); } return (sc->sc_rx_buf && sc->sc_tx_buf) ? 1 : 0; } @@ -617,10 +635,7 @@ umb_free_xfers(struct umb_softc *sc) sc->sc_tx_xfer = NULL; sc->sc_tx_buf = NULL; } - if (sc->sc_tx_m) { - m_freem(sc->sc_tx_m); - sc->sc_tx_m = NULL; - } + ml_purge(>sc_tx_ml); } int @@ -792,35 +807,91 @@ umb_input(struct ifnet *ifp, struct mbuf return 1; } +static inline int +umb_align(size_t bufsz, int offs, int alignment, int remainder) +{ + size_t m = alignment - 1; + int align; + + align = (((size_t)offs + m) & ~m) - alignment + remainder; + if (align < offs) + align += alignment; + if (align > bufsz) + align = bufsz; + return align - offs; +} + +static inline int +umb_padding(void *buf, size_t bufsz, int offs, int alignment, int remainder) +{ + int nb; + + nb = umb_align(bufsz, offs, alignment, remainder); + if (nb > 0) + memset(buf + offs, 0, nb); + return nb; +} + void umb_start(struct ifnet *ifp) { struct umb_softc *sc = ifp->if_softc; - struct mbuf *m_head = NULL; + struct mbuf *m = NULL; + int ndgram = 0; + int offs, plen, len, mlen; + int maxalign; if (usbd_is_dying(sc->sc_udev) || !(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(>if_snd)) return; - m_head = ifq_deq_begin(>if_snd); - if (m_head == NULL) - return; + KASSERT(ml_empty(>sc_tx_ml)); - if (!umb_encap(sc, m_head)) { -