Re: umb: aggregate packets on tx

2017-04-16 Thread Stefan Sperling
On Sat, Apr 15, 2017 at 11:21:41PM +0200, Alexander Bluhm wrote:
> On Mon, Feb 20, 2017 at 04:35:10PM +0100, Gerhard Roth wrote:
> > On Mon, 12 Dec 2016 14:50:50 +0100 Gerhard Roth  
> > wrote:
> > > The current umb(4) implementation needs one USB transfer for every packet
> > > that is sent. With the following patch, we can now aggregate several
> > > packets from the ifq into one single USB transfer.
> > > 
> > > This may speed up the tx path. And even if it doesn't, at least it
> > > reduces the number of transfers required.
> 
> I am running with this for two days now.  Works fine.
> 
> umb0 at uhub0 port 4 configuration 1 interface 6 "Lenovo H5321 gw" rev 
> 2.00/0.00 addr 2
> 
> Diff looks reasonable.  OK bluhm@

Yes, the diff makes sense. OK by me as well.

Sent over 
umb0 at uhub0 port 4 configuration 1 interface 0 "Sierra Wireless Inc. Sierra 
Wireless EM7345 4G LTE" rev 2.00/17.29 addr 7
with patch applied.

Thanks!



Re: umb: aggregate packets on tx

2017-04-15 Thread Alexander Bluhm
On Mon, Feb 20, 2017 at 04:35:10PM +0100, Gerhard Roth wrote:
> On Mon, 12 Dec 2016 14:50:50 +0100 Gerhard Roth  wrote:
> > The current umb(4) implementation needs one USB transfer for every packet
> > that is sent. With the following patch, we can now aggregate several
> > packets from the ifq into one single USB transfer.
> > 
> > This may speed up the tx path. And even if it doesn't, at least it
> > reduces the number of transfers required.

I am running with this for two days now.  Works fine.

umb0 at uhub0 port 4 configuration 1 interface 6 "Lenovo H5321 gw" rev 
2.00/0.00 addr 2

Diff looks reasonable.  OK bluhm@



Re: umb: aggregate packets on tx

2017-02-20 Thread Gerhard Roth
On Mon, 12 Dec 2016 14:50:50 +0100 Gerhard Roth  wrote:
> The current umb(4) implementation needs one USB transfer for every packet
> that is sent. With the following patch, we can now aggregate several
> packets from the ifq into one single USB transfer.
> 
> This may speed up the tx path. And even if it doesn't, at least it
> reduces the number of transfers required.
> 
> 
> Gerhard
> 

Ping.

Anyone willing to ok this?

(Patch below updated to match current).


Gerhard


Index: sys/dev/usb/if_umb.c
===
RCS file: /cvs/src/sys/dev/usb/if_umb.c,v
retrieving revision 1.9
diff -u -p -u -p -r1.9 if_umb.c
--- sys/dev/usb/if_umb.c22 Jan 2017 10:17:39 -  1.9
+++ sys/dev/usb/if_umb.c20 Feb 2017 07:44:40 -
@@ -156,7 +156,7 @@ int  umb_decode_connect_info(struct umb
 int umb_decode_ip_configuration(struct umb_softc *, void *, int);
 voidumb_rx(struct umb_softc *);
 voidumb_rxeof(struct usbd_xfer *, void *, usbd_status);
-int umb_encap(struct umb_softc *, struct mbuf *);
+int umb_encap(struct umb_softc *);
 voidumb_txeof(struct usbd_xfer *, void *, usbd_status);
 voidumb_decap(struct umb_softc *, struct usbd_xfer *);
 
@@ -299,6 +299,7 @@ umb_attach(struct device *parent, struct
 
sc->sc_udev = uaa->device;
sc->sc_ctrl_ifaceno = uaa->ifaceno;
+   ml_init(>sc_tx_ml);
 
/*
 * Some MBIM hardware does not provide the mandatory CDC Union
@@ -583,8 +584,25 @@ umb_ncm_setup(struct umb_softc *sc)
UGETW(np.wLength) == sizeof (np)) {
sc->sc_rx_bufsz = UGETDW(np.dwNtbInMaxSize);
sc->sc_tx_bufsz = UGETDW(np.dwNtbOutMaxSize);
-   } else
+   sc->sc_maxdgram = UGETW(np.wNtbOutMaxDatagrams);
+   sc->sc_align = UGETW(np.wNdpOutAlignment);
+   sc->sc_ndp_div = UGETW(np.wNdpOutDivisor);
+   sc->sc_ndp_remainder = UGETW(np.wNdpOutPayloadRemainder);
+   /* Validate values */
+   if (!powerof2(sc->sc_align) || sc->sc_align == 0 ||
+   sc->sc_align >= sc->sc_tx_bufsz)
+   sc->sc_align = sizeof (uint32_t);
+   if (!powerof2(sc->sc_ndp_div) || sc->sc_ndp_div == 0 ||
+   sc->sc_ndp_div >= sc->sc_tx_bufsz)
+   sc->sc_ndp_div = sizeof (uint32_t);
+   if (sc->sc_ndp_remainder >= sc->sc_ndp_div)
+   sc->sc_ndp_remainder = 0;
+   } else {
sc->sc_rx_bufsz = sc->sc_tx_bufsz = 8 * 1024;
+   sc->sc_maxdgram = 0;
+   sc->sc_align = sc->sc_ndp_div = sizeof (uint32_t);
+   sc->sc_ndp_remainder = 0;
+   }
 }
 
 int
@@ -593,12 +611,12 @@ umb_alloc_xfers(struct umb_softc *sc)
if (!sc->sc_rx_xfer) {
if ((sc->sc_rx_xfer = usbd_alloc_xfer(sc->sc_udev)) != NULL)
sc->sc_rx_buf = usbd_alloc_buffer(sc->sc_rx_xfer,
-   sc->sc_rx_bufsz + MBIM_HDR32_LEN);
+   sc->sc_rx_bufsz);
}
if (!sc->sc_tx_xfer) {
if ((sc->sc_tx_xfer = usbd_alloc_xfer(sc->sc_udev)) != NULL)
sc->sc_tx_buf = usbd_alloc_buffer(sc->sc_tx_xfer,
-   sc->sc_tx_bufsz + MBIM_HDR16_LEN);
+   sc->sc_tx_bufsz);
}
return (sc->sc_rx_buf && sc->sc_tx_buf) ? 1 : 0;
 }
@@ -617,10 +635,7 @@ umb_free_xfers(struct umb_softc *sc)
sc->sc_tx_xfer = NULL;
sc->sc_tx_buf = NULL;
}
-   if (sc->sc_tx_m) {
-   m_freem(sc->sc_tx_m);
-   sc->sc_tx_m = NULL;
-   }
+   ml_purge(>sc_tx_ml);
 }
 
 int
@@ -792,35 +807,91 @@ umb_input(struct ifnet *ifp, struct mbuf
return 1;
 }
 
+static inline int
+umb_align(size_t bufsz, int offs, int alignment, int remainder)
+{
+   size_t   m = alignment - 1;
+   int  align;
+
+   align = (((size_t)offs + m) & ~m) - alignment + remainder;
+   if (align < offs)
+   align += alignment;
+   if (align > bufsz)
+   align = bufsz;
+   return align - offs;
+}
+
+static inline int
+umb_padding(void *buf, size_t bufsz, int offs, int alignment, int remainder)
+{
+   int  nb;
+
+   nb = umb_align(bufsz, offs, alignment, remainder);
+   if (nb > 0)
+   memset(buf + offs, 0, nb);
+   return nb;
+}
+
 void
 umb_start(struct ifnet *ifp)
 {
struct umb_softc *sc = ifp->if_softc;
-   struct mbuf *m_head = NULL;
+   struct mbuf *m = NULL;
+   int  ndgram = 0;
+   int  offs, plen, len, mlen;
+   int  maxalign;
 
if (usbd_is_dying(sc->sc_udev) ||
!(ifp->if_flags & IFF_RUNNING) ||
ifq_is_oactive(>if_snd))
return;
 
-   

Re: umb: aggregate packets on tx

2016-12-12 Thread Bryan Vyhmeister
On Mon, Dec 12, 2016 at 02:50:50PM +0100, Gerhard Roth wrote:
> The current umb(4) implementation needs one USB transfer for every packet
> that is sent. With the following patch, we can now aggregate several
> packets from the ifq into one single USB transfer.
> 
> This may speed up the tx path. And even if it doesn't, at least it
> reduces the number of transfers required.

I applied this diff this morning and everything has been working
smoothly so far. There are no obvious regressions that I have noticed.
This is on an X1 Carbon (4th Gen) with an EM7455 just like the X260 has.

Bryan



umb: aggregate packets on tx

2016-12-12 Thread Gerhard Roth
The current umb(4) implementation needs one USB transfer for every packet
that is sent. With the following patch, we can now aggregate several
packets from the ifq into one single USB transfer.

This may speed up the tx path. And even if it doesn't, at least it
reduces the number of transfers required.


Gerhard


Index: sys/dev/usb/if_umb.c
===
RCS file: /cvs/src/sys/dev/usb/if_umb.c,v
retrieving revision 1.8
diff -u -p -u -p -r1.8 if_umb.c
--- sys/dev/usb/if_umb.c25 Nov 2016 12:43:26 -  1.8
+++ sys/dev/usb/if_umb.c12 Dec 2016 13:38:34 -
@@ -156,7 +156,7 @@ int  umb_decode_connect_info(struct umb
 int umb_decode_ip_configuration(struct umb_softc *, void *, int);
 voidumb_rx(struct umb_softc *);
 voidumb_rxeof(struct usbd_xfer *, void *, usbd_status);
-int umb_encap(struct umb_softc *, struct mbuf *);
+int umb_encap(struct umb_softc *);
 voidumb_txeof(struct usbd_xfer *, void *, usbd_status);
 voidumb_decap(struct umb_softc *, struct usbd_xfer *);
 
@@ -299,6 +299,7 @@ umb_attach(struct device *parent, struct
 
sc->sc_udev = uaa->device;
sc->sc_ctrl_ifaceno = uaa->ifaceno;
+   ml_init(>sc_tx_ml);
 
/*
 * Some MBIM hardware does not provide the mandatory CDC Union
@@ -583,8 +584,25 @@ umb_ncm_setup(struct umb_softc *sc)
UGETW(np.wLength) == sizeof (np)) {
sc->sc_rx_bufsz = UGETDW(np.dwNtbInMaxSize);
sc->sc_tx_bufsz = UGETDW(np.dwNtbOutMaxSize);
-   } else
+   sc->sc_maxdgram = UGETW(np.wNtbOutMaxDatagrams);
+   sc->sc_align = UGETW(np.wNdpOutAlignment);
+   sc->sc_ndp_div = UGETW(np.wNdpOutDivisor);
+   sc->sc_ndp_remainder = UGETW(np.wNdpOutPayloadRemainder);
+   /* Validate values */
+   if (!powerof2(sc->sc_align) || sc->sc_align == 0 ||
+   sc->sc_align >= sc->sc_tx_bufsz)
+   sc->sc_align = sizeof (uint32_t);
+   if (!powerof2(sc->sc_ndp_div) || sc->sc_ndp_div == 0 ||
+   sc->sc_ndp_div >= sc->sc_tx_bufsz)
+   sc->sc_ndp_div = sizeof (uint32_t);
+   if (sc->sc_ndp_remainder >= sc->sc_ndp_div)
+   sc->sc_ndp_remainder = 0;
+   } else {
sc->sc_rx_bufsz = sc->sc_tx_bufsz = 8 * 1024;
+   sc->sc_maxdgram = 0;
+   sc->sc_align = sc->sc_ndp_div = sizeof (uint32_t);
+   sc->sc_ndp_remainder = 0;
+   }
 }
 
 int
@@ -593,12 +611,12 @@ umb_alloc_xfers(struct umb_softc *sc)
if (!sc->sc_rx_xfer) {
if ((sc->sc_rx_xfer = usbd_alloc_xfer(sc->sc_udev)) != NULL)
sc->sc_rx_buf = usbd_alloc_buffer(sc->sc_rx_xfer,
-   sc->sc_rx_bufsz + MBIM_HDR32_LEN);
+   sc->sc_rx_bufsz);
}
if (!sc->sc_tx_xfer) {
if ((sc->sc_tx_xfer = usbd_alloc_xfer(sc->sc_udev)) != NULL)
sc->sc_tx_buf = usbd_alloc_buffer(sc->sc_tx_xfer,
-   sc->sc_tx_bufsz + MBIM_HDR16_LEN);
+   sc->sc_tx_bufsz);
}
return (sc->sc_rx_buf && sc->sc_tx_buf) ? 1 : 0;
 }
@@ -617,10 +635,7 @@ umb_free_xfers(struct umb_softc *sc)
sc->sc_tx_xfer = NULL;
sc->sc_tx_buf = NULL;
}
-   if (sc->sc_tx_m) {
-   m_freem(sc->sc_tx_m);
-   sc->sc_tx_m = NULL;
-   }
+   ml_purge(>sc_tx_ml);
 }
 
 int
@@ -792,35 +807,91 @@ umb_input(struct ifnet *ifp, struct mbuf
return 1;
 }
 
+static inline int
+umb_align(size_t bufsz, int offs, int alignment, int remainder)
+{
+   size_t   m = alignment - 1;
+   int  align;
+
+   align = (((size_t)offs + m) & ~m) - alignment + remainder;
+   if (align < offs)
+   align += alignment;
+   if (align > bufsz)
+   align = bufsz;
+   return align - offs;
+}
+
+static inline int
+umb_padding(void *buf, size_t bufsz, int offs, int alignment, int remainder)
+{
+   int  nb;
+
+   nb = umb_align(bufsz, offs, alignment, remainder);
+   if (nb > 0)
+   memset(buf + offs, 0, nb);
+   return nb;
+}
+
 void
 umb_start(struct ifnet *ifp)
 {
struct umb_softc *sc = ifp->if_softc;
-   struct mbuf *m_head = NULL;
+   struct mbuf *m = NULL;
+   int  ndgram = 0;
+   int  offs, plen, len, mlen;
+   int  maxalign;
 
if (usbd_is_dying(sc->sc_udev) ||
!(ifp->if_flags & IFF_RUNNING) ||
ifq_is_oactive(>if_snd))
return;
 
-   m_head = ifq_deq_begin(>if_snd);
-   if (m_head == NULL)
-   return;
+   KASSERT(ml_empty(>sc_tx_ml));
 
-   if (!umb_encap(sc, m_head)) {
-