Re: call if_input only once per Rx interrupt from net80211
On Tue, Sep 10, 2019 at 10:35:34PM +0200, Matthias Schmidt wrote: > Hi Stefan, > > * Stefan Sperling wrote: > > > > I think I see why. I forgot to convert some existing ieee80211_input() > > calls to ieee80211_inputm(), in ieee80211_input.c. > > These calls are related to buffered aggregated frames, so aggregated > > frames triggered multiple if_input() calls per interrupt again. > > > > In the first diff ieee80211_input() was putting aggregated frames > > onto the global mbuf list. With this new diff they get added to the > > mbuf list which the driver's rx interrupt handler passed in. > > > > Does this fix the issue? > > Yes, indeed. Download test files from a leaseweb mirror is now so fast > that my CPU fan starts spinning :) This time, only tested on iwm. > > Kudos for the quick fix! > > Matthias > Thanks for your help! Glad it's fixed :)
Re: call if_input only once per Rx interrupt from net80211
Hi Stefan, * Stefan Sperling wrote: > > I think I see why. I forgot to convert some existing ieee80211_input() > calls to ieee80211_inputm(), in ieee80211_input.c. > These calls are related to buffered aggregated frames, so aggregated > frames triggered multiple if_input() calls per interrupt again. > > In the first diff ieee80211_input() was putting aggregated frames > onto the global mbuf list. With this new diff they get added to the > mbuf list which the driver's rx interrupt handler passed in. > > Does this fix the issue? Yes, indeed. Download test files from a leaseweb mirror is now so fast that my CPU fan starts spinning :) This time, only tested on iwm. Kudos for the quick fix! Matthias
Re: call if_input only once per Rx interrupt from net80211
On Tue, Sep 10, 2019 at 07:08:14PM +0200, Matthias Schmidt wrote: > Hi Stefan, > > * Stefan Sperling wrote: > > > > New diff with above changes: > > I tested your new diff with two different systems: > > * Thinkpad T450s with iwm (8265, same as yesterday) > * Thinkpad X220 with iwn (6205) > > and on both systems I see a drastic regression compared to yesterday's > patch. The download speeds is around 400-700K/s on both systems. As > soon as I switch back to the kernel with yesterday's patch, I end up > having 4M/s in average, again. I think I see why. I forgot to convert some existing ieee80211_input() calls to ieee80211_inputm(), in ieee80211_input.c. These calls are related to buffered aggregated frames, so aggregated frames triggered multiple if_input() calls per interrupt again. In the first diff ieee80211_input() was putting aggregated frames onto the global mbuf list. With this new diff they get added to the mbuf list which the driver's rx interrupt handler passed in. Does this fix the issue? diff refs/heads/master refs/heads/ifqdrop blob - d72e8edceada8a680744a6b8478bb91ac9e15e6e blob + a3203d7eb1a67d478bf280a551a43f2dc66c0965 --- sys/dev/ic/ar5008.c +++ sys/dev/ic/ar5008.c @@ -789,7 +789,7 @@ ar5008_rx_radiotap(struct athn_softc *sc, struct mbuf #endif static __inline int -ar5008_rx_process(struct athn_softc *sc) +ar5008_rx_process(struct athn_softc *sc, struct mbuf_list *ml) { struct ieee80211com *ic = >sc_ic; struct ifnet *ifp = >ic_if; @@ -931,7 +931,7 @@ ar5008_rx_process(struct athn_softc *sc) rxi.rxi_rssi = MS(ds->ds_status4, AR_RXS4_RSSI_COMBINED); rxi.rxi_rssi += AR_DEFAULT_NOISE_FLOOR; rxi.rxi_tstamp = ds->ds_status2; - ieee80211_input(ifp, m, ni, ); + ieee80211_inputm(ifp, m, ni, , ml); /* Node is no longer needed. */ ieee80211_release_node(ic, ni); @@ -960,7 +960,13 @@ ar5008_rx_process(struct athn_softc *sc) void ar5008_rx_intr(struct athn_softc *sc) { - while (ar5008_rx_process(sc) == 0); + struct mbuf_list ml = MBUF_LIST_INITIALIZER(); + struct ieee80211com *ic = >sc_ic; + struct ifnet *ifp = >ic_if; + + while (ar5008_rx_process(sc, ) == 0); + + if_input(ifp, ); } int blob - 69ade5ade5a35e632a025db327668d695a0edd2d blob + dafa3bd1f0b4ca2124c6d963c82e289594e88b27 --- sys/dev/ic/ar9003.c +++ sys/dev/ic/ar9003.c @@ -83,7 +83,7 @@ void ar9003_reset_txsring(struct athn_softc *); void ar9003_rx_enable(struct athn_softc *); void ar9003_rx_radiotap(struct athn_softc *, struct mbuf *, struct ar_rx_status *); -intar9003_rx_process(struct athn_softc *, int); +intar9003_rx_process(struct athn_softc *, int, struct mbuf_list *); void ar9003_rx_intr(struct athn_softc *, int); intar9003_tx_process(struct athn_softc *); void ar9003_tx_intr(struct athn_softc *); @@ -916,7 +916,7 @@ ar9003_rx_radiotap(struct athn_softc *sc, struct mbuf #endif int -ar9003_rx_process(struct athn_softc *sc, int qid) +ar9003_rx_process(struct athn_softc *sc, int qid, struct mbuf_list *ml) { struct ieee80211com *ic = >sc_ic; struct ifnet *ifp = >ic_if; @@ -1036,7 +1036,7 @@ ar9003_rx_process(struct athn_softc *sc, int qid) rxi.rxi_flags = 0; /* XXX */ rxi.rxi_rssi = MS(ds->ds_status5, AR_RXS5_RSSI_COMBINED); rxi.rxi_tstamp = ds->ds_status3; - ieee80211_input(ifp, m, ni, ); + ieee80211_inputm(ifp, m, ni, , ml); /* Node is no longer needed. */ ieee80211_release_node(ic, ni); @@ -1066,7 +1066,13 @@ ar9003_rx_process(struct athn_softc *sc, int qid) void ar9003_rx_intr(struct athn_softc *sc, int qid) { - while (ar9003_rx_process(sc, qid) == 0); + struct mbuf_list ml = MBUF_LIST_INITIALIZER(); + struct ieee80211com *ic = >sc_ic; + struct ifnet *ifp = >ic_if; + + while (ar9003_rx_process(sc, qid, ) == 0); + + if_input(ifp, ); } int blob - c0c5f4241b010c5c38a557d97963fbdbc884336d blob + c4414e5113134012edaf4ce4557bb7e25987e9ef --- sys/dev/ic/ath.c +++ sys/dev/ic/ath.c @@ -1795,6 +1795,7 @@ ath_rxbuf_init(struct ath_softc *sc, struct ath_buf *b void ath_rx_proc(void *arg, int npending) { + struct mbuf_list ml = MBUF_LIST_INITIALIZER(); #definePA2DESC(_sc, _pa) \ ((struct ath_desc *)((caddr_t)(_sc)->sc_desc + \ ((_pa) - (_sc)->sc_desc_paddr))) @@ -1946,7 +1947,7 @@ ath_rx_proc(void *arg, int npending) if (!ath_softcrypto && (wh->i_fc[1] & IEEE80211_FC1_WEP)) { /* * WEP is decrypted by hardware. Clear WEP bit -* and trim WEP header for ieee80211_input(). +* and trim WEP header for ieee80211_inputm(). */ wh->i_fc[1] &= ~IEEE80211_FC1_WEP; bcopy(wh, , sizeof(whbuf)); @@ -1988,7 +1989,7 @@ ath_rx_proc(void *arg, int npending)
Re: call if_input only once per Rx interrupt from net80211
Hi Stefan, * Stefan Sperling wrote: > > New diff with above changes: I tested your new diff with two different systems: * Thinkpad T450s with iwm (8265, same as yesterday) * Thinkpad X220 with iwn (6205) and on both systems I see a drastic regression compared to yesterday's patch. The download speeds is around 400-700K/s on both systems. As soon as I switch back to the kernel with yesterday's patch, I end up having 4M/s in average, again. Cheers Matthias
Re: call if_input only once per Rx interrupt from net80211
On Mon, Sep 09, 2019 at 06:17:34PM -0300, Martin Pieuchot wrote: > On 09/09/19(Mon) 16:37, Stefan Sperling wrote: > > On Mon, Sep 09, 2019 at 03:10:04PM +0200, Stefan Sperling wrote: > > > The wifi stack currently calls if_input once per packet instead of once > > > per interrupt. To make the wifi layer play nicely with the network stack > > > we can split ieee80211_input() into two parts: > > > > Updated diff which avoids purging the input queue at every state > > change, e.g. even during SCAN->SCAN. With this we only purge the > > queue if we're leaving RUN state or going back to INIT state. > > Thanks a lot! I must say I looked at this in the past but got lost in > ieee80211_input(). > > Why not keep ieee80211_input() as a wrapper around your new mechanism? > This way you don't need to touch all drivers at once. We can keep ieee80211_input() for use by drivers that really only deliver one frame per interrupt. Now we don't need to touch some drivers at all. > I'd also suggest using a queue on-stack like we do for Ethernet drivers, > this would get rid of the cleanup of `ic_ml' when the state change. It > would also help developers familiar with Ethernet drivers to understand > what's happening ;o) Yes, thanks for this suggestion! Things make a lot more sense this way. > What about: > > ieee80211_enqueue(ifp, m, ni, , ); > ieee80211_inputm(ifp, ); I don't like the name ieee80211_enqueue() because while data frames are being enqueued, management frames are not enqueued. Instead, they cause immediate state changes in the net80211 stack such as allocation of a new node when a beacon is received state changes, e.g. RUN -> AUTH when a deauth frame is received. When drivers have the mbuf list on the stack we do not need a wrapper for if_input() since drivers can just call it directly. So I have chosen to use the following instead: ieee80211_inputm(ifp, m, ni, , ); if_input(ifp, ); With ieee80211_input() being a wrapper around these two calls. > Are you sure if_input() needs to be called at splnet()? I don't think > so because many pseudo-drivers call it at a different IPL. I did this only because I was afraid the global ic_ml might be accessed by hardware interrupts while we're running newstate() in a task as some drivers will do. This becomes a non-issue with mbuf lists on the stack. New diff with above changes: diff refs/heads/master refs/heads/ifqdrop blob - d72e8edceada8a680744a6b8478bb91ac9e15e6e blob + a3203d7eb1a67d478bf280a551a43f2dc66c0965 --- sys/dev/ic/ar5008.c +++ sys/dev/ic/ar5008.c @@ -789,7 +789,7 @@ ar5008_rx_radiotap(struct athn_softc *sc, struct mbuf #endif static __inline int -ar5008_rx_process(struct athn_softc *sc) +ar5008_rx_process(struct athn_softc *sc, struct mbuf_list *ml) { struct ieee80211com *ic = >sc_ic; struct ifnet *ifp = >ic_if; @@ -931,7 +931,7 @@ ar5008_rx_process(struct athn_softc *sc) rxi.rxi_rssi = MS(ds->ds_status4, AR_RXS4_RSSI_COMBINED); rxi.rxi_rssi += AR_DEFAULT_NOISE_FLOOR; rxi.rxi_tstamp = ds->ds_status2; - ieee80211_input(ifp, m, ni, ); + ieee80211_inputm(ifp, m, ni, , ml); /* Node is no longer needed. */ ieee80211_release_node(ic, ni); @@ -960,7 +960,13 @@ ar5008_rx_process(struct athn_softc *sc) void ar5008_rx_intr(struct athn_softc *sc) { - while (ar5008_rx_process(sc) == 0); + struct mbuf_list ml = MBUF_LIST_INITIALIZER(); + struct ieee80211com *ic = >sc_ic; + struct ifnet *ifp = >ic_if; + + while (ar5008_rx_process(sc, ) == 0); + + if_input(ifp, ); } int blob - 69ade5ade5a35e632a025db327668d695a0edd2d blob + dafa3bd1f0b4ca2124c6d963c82e289594e88b27 --- sys/dev/ic/ar9003.c +++ sys/dev/ic/ar9003.c @@ -83,7 +83,7 @@ void ar9003_reset_txsring(struct athn_softc *); void ar9003_rx_enable(struct athn_softc *); void ar9003_rx_radiotap(struct athn_softc *, struct mbuf *, struct ar_rx_status *); -intar9003_rx_process(struct athn_softc *, int); +intar9003_rx_process(struct athn_softc *, int, struct mbuf_list *); void ar9003_rx_intr(struct athn_softc *, int); intar9003_tx_process(struct athn_softc *); void ar9003_tx_intr(struct athn_softc *); @@ -916,7 +916,7 @@ ar9003_rx_radiotap(struct athn_softc *sc, struct mbuf #endif int -ar9003_rx_process(struct athn_softc *sc, int qid) +ar9003_rx_process(struct athn_softc *sc, int qid, struct mbuf_list *ml) { struct ieee80211com *ic = >sc_ic; struct ifnet *ifp = >ic_if; @@ -1036,7 +1036,7 @@ ar9003_rx_process(struct athn_softc *sc, int qid) rxi.rxi_flags = 0; /* XXX */ rxi.rxi_rssi = MS(ds->ds_status5, AR_RXS5_RSSI_COMBINED); rxi.rxi_tstamp = ds->ds_status3; - ieee80211_input(ifp, m, ni, ); + ieee80211_inputm(ifp, m, ni, , ml); /* Node is no longer needed. */ ieee80211_release_node(ic, ni); @@ -1066,7 +1066,13 @@ ar9003_rx_process(struct
Re: call if_input only once per Rx interrupt from net80211
On 09/09/19(Mon) 16:37, Stefan Sperling wrote: > On Mon, Sep 09, 2019 at 03:10:04PM +0200, Stefan Sperling wrote: > > The wifi stack currently calls if_input once per packet instead of once > > per interrupt. To make the wifi layer play nicely with the network stack > > we can split ieee80211_input() into two parts: > > Updated diff which avoids purging the input queue at every state > change, e.g. even during SCAN->SCAN. With this we only purge the > queue if we're leaving RUN state or going back to INIT state. Thanks a lot! I must say I looked at this in the past but got lost in ieee80211_input(). Why not keep ieee80211_input() as a wrapper around your new mechanism? This way you don't need to touch all drivers at once. I'd also suggest using a queue on-stack like we do for Ethernet drivers, this would get rid of the cleanup of `ic_ml' when the state change. It would also help developers familiar with Ethernet drivers to understand what's happening ;o) What about: ieee80211_enqueue(ifp, m, ni, , ); ieee80211_inputm(ifp, ); Are you sure if_input() needs to be called at splnet()? I don't think so because many pseudo-drivers call it at a different IPL. > diff refs/heads/master refs/heads/ifqdrop > blob - a1ca62ea1a4b5af7d1d1765eb1da131e15e21e4e > blob + bb0660f8da5ea340de57519e471b3a1a88c7da33 > --- sys/dev/ic/acx.c > +++ sys/dev/ic/acx.c > @@ -1398,6 +1398,7 @@ acx_rxeof(struct acx_softc *sc) > rxi.rxi_rssi = head->rbh_level; > rxi.rxi_tstamp = letoh32(head->rbh_time); > ieee80211_input(ifp, m, ni, ); > + ieee80211_input_flush(ifp); > > ieee80211_release_node(ic, ni); > } else { > blob - d3b9ada242c555f179dad1529c20a1748d4b7917 > blob + d62e42897867623911ae53637503a42178637603 > --- sys/dev/ic/an.c > +++ sys/dev/ic/an.c > @@ -477,6 +477,7 @@ an_rxeof(struct an_softc *sc) > rxi.rxi_rssi = frmhdr.an_rx_signal_strength; > rxi.rxi_tstamp = an_switch32(frmhdr.an_rx_time); > ieee80211_input(ifp, m, ni, ); > + ieee80211_input_flush(ifp); > ieee80211_release_node(ic, ni); > } > > blob - d72e8edceada8a680744a6b8478bb91ac9e15e6e > blob + 14dc84e692380a6a59b9fd9e3846f83e9a28e461 > --- sys/dev/ic/ar5008.c > +++ sys/dev/ic/ar5008.c > @@ -960,7 +960,12 @@ ar5008_rx_process(struct athn_softc *sc) > void > ar5008_rx_intr(struct athn_softc *sc) > { > + struct ieee80211com *ic = >sc_ic; > + struct ifnet *ifp = >ic_if; > + > while (ar5008_rx_process(sc) == 0); > + > + ieee80211_input_flush(ifp); > } > > int > blob - 69ade5ade5a35e632a025db327668d695a0edd2d > blob + c34c8dae63f268c4baa4b8be396e0a6f6af8d05b > --- sys/dev/ic/ar9003.c > +++ sys/dev/ic/ar9003.c > @@ -1066,7 +1066,12 @@ ar9003_rx_process(struct athn_softc *sc, int qid) > void > ar9003_rx_intr(struct athn_softc *sc, int qid) > { > + struct ieee80211com *ic = >sc_ic; > + struct ifnet *ifp = >ic_if; > + > while (ar9003_rx_process(sc, qid) == 0); > + > + ieee80211_input_flush(ifp); > } > > int > blob - c0c5f4241b010c5c38a557d97963fbdbc884336d > blob + 4d481bf2aada58fd7f9c484e3564db5b3d96d2b0 > --- sys/dev/ic/ath.c > +++ sys/dev/ic/ath.c > @@ -2005,6 +2005,8 @@ ath_rx_proc(void *arg, int npending) > TAILQ_INSERT_TAIL(>sc_rxbuf, bf, bf_list); > } while (ath_rxbuf_init(sc, bf) == 0); > > + ieee80211_input_flush(ifp); > + > ath_hal_set_rx_signal(ah); /* rx signal state monitoring */ > ath_hal_start_rx(ah); /* in case of RXEOL */ > #undef PA2DESC > blob - cbf21da74007080c510fa9c56a58aedff62fa586 > blob + 301b784e1b6878e0b024e17f7e515097fa6363d1 > --- sys/dev/ic/atw.c > +++ sys/dev/ic/atw.c > @@ -3191,6 +3191,7 @@ atw_rxintr(struct atw_softc *sc) >*/ > ieee80211_release_node(ic, ni); > } > + ieee80211_input_flush(ifp); > > /* Update the receive pointer. */ > sc->sc_rxptr = i; > blob - 7d6f2c5a5693881e2dffdd1814a757c9563196fc > blob + 9ce7309aeb96e7282fc8ac28e3cfc633e1d9f592 > --- sys/dev/ic/bwfm.c > +++ sys/dev/ic/bwfm.c > @@ -2118,6 +2118,7 @@ bwfm_rx_auth_ind(struct bwfm_softc *sc, struct bwfm_ev > rxi.rxi_rssi = 0; > rxi.rxi_tstamp = 0; > ieee80211_input(ifp, m, ic->ic_bss, ); > + ieee80211_input_flush(ifp); > } > > void > @@ -2174,6 +2175,7 @@ bwfm_rx_assoc_ind(struct bwfm_softc *sc, struct bwfm_e > rxi.rxi_rssi = 0; > rxi.rxi_tstamp = 0; > ieee80211_input(ifp, m, ni, ); > + ieee80211_input_flush(ifp); > } > > void > @@ -2229,6 +2231,7 @@ bwfm_rx_leave_ind(struct bwfm_softc *sc, struct bwfm_e > rxi.rxi_rssi = 0; > rxi.rxi_tstamp = 0; > ieee80211_input(ifp, m, ni, ); > + ieee80211_input_flush(ifp); > } > #endif > > @@ -2418,6 +2421,7 @@ bwfm_scan_node(struct bwfm_softc *sc, struct bwfm_bss_ > rxi.rxi_rssi =
Re: call if_input only once per Rx interrupt from net80211
Hi Stefan, * Stefan Sperling wrote: > On Mon, Sep 09, 2019 at 03:10:04PM +0200, Stefan Sperling wrote: > > The wifi stack currently calls if_input once per packet instead of once > > per interrupt. To make the wifi layer play nicely with the network stack > > we can split ieee80211_input() into two parts: > > Updated diff which avoids purging the input queue at every state > change, e.g. even during SCAN->SCAN. With this we only purge the > queue if we're leaving RUN state or going back to INIT state. Tested your diff with iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" So far, the diff provides amazing results. Without your diff the maximum speed of file downloads from my favorite mirror is around 1.2M/s in average. With your diff it is around 4.5M/s in average. Cheers Matthias
Re: call if_input only once per Rx interrupt from net80211
On Mon 09/09/2019 16:37, Stefan Sperling wrote: > On Mon, Sep 09, 2019 at 03:10:04PM +0200, Stefan Sperling wrote: > > The wifi stack currently calls if_input once per packet instead of once > > per interrupt. To make the wifi layer play nicely with the network stack > > we can split ieee80211_input() into two parts: > > Updated diff which avoids purging the input queue at every state > change, e.g. even during SCAN->SCAN. With this we only purge the > queue if we're leaving RUN state or going back to INIT state. I tested the updated diff by downloading a 400M file and looking at the throughput and Idrop before, and after each run. I also tried different values for net.link.ifrxq.pressure_drop. $ dmesg | grep iwn iwn0 at pci2 dev 0 function 0 "Intel Centrino Advanced-N 6205" rev 0x34: msi, MIMO 2T2R, MoW, address Before applying your diff: pressure_drop MB/sIdrop beforeIdrop after 8 3.0 21944117 32 4.3 41174345 128 4.1 43454345 After applying your diff: pressure_drop MB/sIdrop beforeIdrop after 8 5.4 33543606 32 6.2 36063606 128 6.2 36063606 It seems that your diff improves throughput, and reduces number of Idrops. At higher values of net.link.ifrxq.pressure_drop throughput is even higher and delta Idrop is 0.
Re: call if_input only once per Rx interrupt from net80211
this seems to work fine on iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless AC 7260" rev 0x83, msi download is considerably less wobbly, upload also benefits a bit(?): before: [florian@x1:~]$ tcpbench -t 10 192.168.178.73 elapsed_ms bytes mbps bwidth 10012683144 21.444 100.00% Conn: 1 Mbps: 21.444 Peak Mbps: 21.444 Avg Mbps: 21.444 20132894552 22.904 100.00% Conn: 1 Mbps: 22.904 Peak Mbps: 22.904 Avg Mbps: 22.904 3014 7717846.174 100.00% Conn: 1 Mbps:6.174 Peak Mbps: 22.904 Avg Mbps:6.174 40142380512 19.044 100.00% Conn: 1 Mbps: 19.044 Peak Mbps: 22.904 Avg Mbps: 19.044 50393042248 23.768 100.00% Conn: 1 Mbps: 23.768 Peak Mbps: 23.768 Avg Mbps: 23.768 60352858352 22.959 100.00% Conn: 1 Mbps: 22.959 Peak Mbps: 23.768 Avg Mbps: 22.959 70342875728 23.029 100.00% Conn: 1 Mbps: 23.029 Peak Mbps: 23.768 Avg Mbps: 23.029 80343701088 29.609 100.00% Conn: 1 Mbps: 29.609 Peak Mbps: 29.609 Avg Mbps: 29.609 90352927856 23.423 100.00% Conn: 1 Mbps: 23.423 Peak Mbps: 29.609 Avg Mbps: 23.423 100394019648 32.061 100.00% Conn: 1 Mbps: 32.061 Peak Mbps: 32.061 Avg Mbps: 32.061 [florian@x1:~]$ tcpbench -s elapsed_ms bytes mbps bwidth 10132939440 23.214 100.00% Conn: 1 Mbps: 23.214 Peak Mbps: 23.214 Avg Mbps: 23.214 2018 1274241.015 100.00% Conn: 1 Mbps:1.015 Peak Mbps: 23.214 Avg Mbps:1.015 30193453480 27.628 100.00% Conn: 1 Mbps: 27.628 Peak Mbps: 27.628 Avg Mbps: 27.628 40331781040 14.052 100.00% Conn: 1 Mbps: 14.052 Peak Mbps: 27.628 Avg Mbps: 14.052 5034 9281687.425 100.00% Conn: 1 Mbps:7.425 Peak Mbps: 27.628 Avg Mbps:7.425 60363386872 27.068 100.00% Conn: 1 Mbps: 27.068 Peak Mbps: 27.628 Avg Mbps: 27.068 70533233384 25.435 100.00% Conn: 1 Mbps: 25.435 Peak Mbps: 27.628 Avg Mbps: 25.435 80631796968 14.233 100.00% Conn: 1 Mbps: 14.233 Peak Mbps: 27.628 Avg Mbps: 14.233 9068 970160.773 100.00% Conn: 1 Mbps:0.773 Peak Mbps: 27.628 Avg Mbps:0.773 100692641152 21.129 100.00% Conn: 1 Mbps: 21.129 Peak Mbps: 27.628 Avg Mbps: 21.129 1107311062728.815 100.00% Conn: 1 Mbps:8.815 Peak Mbps: 27.628 Avg Mbps:8.815 after: [florian@x1:~]$ tcpbench -t 10 192.168.178.73 elapsed_ms bytes mbps bwidth 10043311576 26.387 100.00% Conn: 1 Mbps: 26.387 Peak Mbps: 26.387 Avg Mbps: 26.387 20063247864 25.957 100.00% Conn: 1 Mbps: 25.957 Peak Mbps: 26.387 Avg Mbps: 25.957 30483614208 27.748 100.00% Conn: 1 Mbps: 27.748 Peak Mbps: 27.748 Avg Mbps: 27.748 40503876296 30.979 100.00% Conn: 1 Mbps: 30.979 Peak Mbps: 30.979 Avg Mbps: 30.979 50543518640 28.065 100.00% Conn: 1 Mbps: 28.065 Peak Mbps: 30.979 Avg Mbps: 28.065 60533604072 28.861 100.00% Conn: 1 Mbps: 28.861 Peak Mbps: 30.979 Avg Mbps: 28.861 70543494024 27.924 100.00% Conn: 1 Mbps: 27.924 Peak Mbps: 30.979 Avg Mbps: 27.924 80553200080 25.601 100.00% Conn: 1 Mbps: 25.601 Peak Mbps: 30.979 Avg Mbps: 25.601 90563525880 28.179 100.00% Conn: 1 Mbps: 28.179 Peak Mbps: 30.979 Avg Mbps: 28.179 100573084240 24.649 100.00% Conn: 1 Mbps: 24.649 Peak Mbps: 30.979 Avg Mbps: 24.649 [florian@x1:~]$ tcpbench -s elapsed_ms bytes mbps bwidth 10024358480 34.798 100.00% Conn: 1 Mbps: 34.798 Peak Mbps: 34.798 Avg Mbps: 34.798 20135059312 40.034 100.00% Conn: 1 Mbps: 40.034 Peak Mbps: 40.034 Avg Mbps: 40.034 30114960848 39.766 100.00% Conn: 1 Mbps:
Re: call if_input only once per Rx interrupt from net80211
On Mon, Sep 09, 2019 at 03:10:04PM +0200, Stefan Sperling wrote: > The wifi stack currently calls if_input once per packet instead of once > per interrupt. To make the wifi layer play nicely with the network stack > we can split ieee80211_input() into two parts: Updated diff which avoids purging the input queue at every state change, e.g. even during SCAN->SCAN. With this we only purge the queue if we're leaving RUN state or going back to INIT state. diff refs/heads/master refs/heads/ifqdrop blob - a1ca62ea1a4b5af7d1d1765eb1da131e15e21e4e blob + bb0660f8da5ea340de57519e471b3a1a88c7da33 --- sys/dev/ic/acx.c +++ sys/dev/ic/acx.c @@ -1398,6 +1398,7 @@ acx_rxeof(struct acx_softc *sc) rxi.rxi_rssi = head->rbh_level; rxi.rxi_tstamp = letoh32(head->rbh_time); ieee80211_input(ifp, m, ni, ); + ieee80211_input_flush(ifp); ieee80211_release_node(ic, ni); } else { blob - d3b9ada242c555f179dad1529c20a1748d4b7917 blob + d62e42897867623911ae53637503a42178637603 --- sys/dev/ic/an.c +++ sys/dev/ic/an.c @@ -477,6 +477,7 @@ an_rxeof(struct an_softc *sc) rxi.rxi_rssi = frmhdr.an_rx_signal_strength; rxi.rxi_tstamp = an_switch32(frmhdr.an_rx_time); ieee80211_input(ifp, m, ni, ); + ieee80211_input_flush(ifp); ieee80211_release_node(ic, ni); } blob - d72e8edceada8a680744a6b8478bb91ac9e15e6e blob + 14dc84e692380a6a59b9fd9e3846f83e9a28e461 --- sys/dev/ic/ar5008.c +++ sys/dev/ic/ar5008.c @@ -960,7 +960,12 @@ ar5008_rx_process(struct athn_softc *sc) void ar5008_rx_intr(struct athn_softc *sc) { + struct ieee80211com *ic = >sc_ic; + struct ifnet *ifp = >ic_if; + while (ar5008_rx_process(sc) == 0); + + ieee80211_input_flush(ifp); } int blob - 69ade5ade5a35e632a025db327668d695a0edd2d blob + c34c8dae63f268c4baa4b8be396e0a6f6af8d05b --- sys/dev/ic/ar9003.c +++ sys/dev/ic/ar9003.c @@ -1066,7 +1066,12 @@ ar9003_rx_process(struct athn_softc *sc, int qid) void ar9003_rx_intr(struct athn_softc *sc, int qid) { + struct ieee80211com *ic = >sc_ic; + struct ifnet *ifp = >ic_if; + while (ar9003_rx_process(sc, qid) == 0); + + ieee80211_input_flush(ifp); } int blob - c0c5f4241b010c5c38a557d97963fbdbc884336d blob + 4d481bf2aada58fd7f9c484e3564db5b3d96d2b0 --- sys/dev/ic/ath.c +++ sys/dev/ic/ath.c @@ -2005,6 +2005,8 @@ ath_rx_proc(void *arg, int npending) TAILQ_INSERT_TAIL(>sc_rxbuf, bf, bf_list); } while (ath_rxbuf_init(sc, bf) == 0); + ieee80211_input_flush(ifp); + ath_hal_set_rx_signal(ah); /* rx signal state monitoring */ ath_hal_start_rx(ah); /* in case of RXEOL */ #undef PA2DESC blob - cbf21da74007080c510fa9c56a58aedff62fa586 blob + 301b784e1b6878e0b024e17f7e515097fa6363d1 --- sys/dev/ic/atw.c +++ sys/dev/ic/atw.c @@ -3191,6 +3191,7 @@ atw_rxintr(struct atw_softc *sc) */ ieee80211_release_node(ic, ni); } + ieee80211_input_flush(ifp); /* Update the receive pointer. */ sc->sc_rxptr = i; blob - 7d6f2c5a5693881e2dffdd1814a757c9563196fc blob + 9ce7309aeb96e7282fc8ac28e3cfc633e1d9f592 --- sys/dev/ic/bwfm.c +++ sys/dev/ic/bwfm.c @@ -2118,6 +2118,7 @@ bwfm_rx_auth_ind(struct bwfm_softc *sc, struct bwfm_ev rxi.rxi_rssi = 0; rxi.rxi_tstamp = 0; ieee80211_input(ifp, m, ic->ic_bss, ); + ieee80211_input_flush(ifp); } void @@ -2174,6 +2175,7 @@ bwfm_rx_assoc_ind(struct bwfm_softc *sc, struct bwfm_e rxi.rxi_rssi = 0; rxi.rxi_tstamp = 0; ieee80211_input(ifp, m, ni, ); + ieee80211_input_flush(ifp); } void @@ -2229,6 +2231,7 @@ bwfm_rx_leave_ind(struct bwfm_softc *sc, struct bwfm_e rxi.rxi_rssi = 0; rxi.rxi_tstamp = 0; ieee80211_input(ifp, m, ni, ); + ieee80211_input_flush(ifp); } #endif @@ -2418,6 +2421,7 @@ bwfm_scan_node(struct bwfm_softc *sc, struct bwfm_bss_ rxi.rxi_rssi = (int16_t)letoh16(bss->rssi); rxi.rxi_tstamp = 0; ieee80211_input(ifp, m, ni, ); + ieee80211_input_flush(ifp); /* Restore channel */ if (bss_chan) ni->ni_chan = bss_chan; blob - 58417ce6f7de644fe86347459c04553956b2edc0 blob + 25a40399a82089a496698f4a1fb8d88cb59375ac --- sys/dev/ic/bwi.c +++ sys/dev/ic/bwi.c @@ -8466,6 +8466,7 @@ bwi_rxeof(struct bwi_softc *sc, int end_idx) next: idx = (idx + 1) % BWI_RX_NDESC; } + ieee80211_input_flush(ifp); rbd->rbd_idx = idx; bus_dmamap_sync(sc->sc_dmat, rd->rdata_dmap, 0, blob - 6a8c5646f0b6f649a60793e127cbd746d8ee89d7 blob + bcd515d11a392f49f75914f1af6581b35b8cf054 --- sys/dev/ic/malo.c +++ sys/dev/ic/malo.c @@ -1727,6 +1727,7 @@ skip: sc->sc_rxring.cur = (sc->sc_rxring.cur + 1) % MALO_RX_RING_COUNT; } +
call if_input only once per Rx interrupt from net80211
robert@ noticed that iwm(4) Rx throughput increased after running: sysctl net.link.ifrxq.pressure_drop=32 The default value for this sysctl is 8. dlg@ explained the problem to me: [[[ the pressure stuff counts the number of times ifiq_input (which is called from if_input) gets called before a corresponding ifiq_process call gets run. if ifiq_input gets called a lot before ifiq_process runs, pressure builds up and after ifiq_pressure_drop calls it will shed load by dropping packets. ifiq_input is built to handle a list of packets built by processing the rx ring of a nic in a single interrupt. every hw interrupt should cause a single ifiq_input call. the list is hopefully processed in nettq in between the hw interrupts, but the pressure thresholds give you a bit of leeway. ]]] The wifi stack currently calls if_input once per packet instead of once per interrupt. To make the wifi layer play nicely with the network stack we can split ieee80211_input() into two parts: 1) enqueue frames pulled from hardware to an mbuf list: ieee80211_input() 1) deliver frames to network stack via if_input: ieee80211_input_flush() Existing ieee80211_input() calls don't need to be adjusted because enqueuing happens under the hood onto a new mbuf list in ieee80211com. Drivers call ieee80211_input_flush() once per Rx interrupt. This diff touches all drivers and should be tested on as many drivers as possible. While these changes are mostly mechanical, a lot of this touches legacy drivers which I cannot easily test. Please thoroughly review for errors. bwfm(4) does not use ieee80211_input() for data frames but has the same bug and will need to be fixed separately (patrick@ is aware). I expect that 11n mode in particular will benefit from this. ifq pressure drops can be fairly easily triggered by Rx aggregation. diff refs/heads/master refs/heads/ifqdrop blob - a1ca62ea1a4b5af7d1d1765eb1da131e15e21e4e blob + bb0660f8da5ea340de57519e471b3a1a88c7da33 --- sys/dev/ic/acx.c +++ sys/dev/ic/acx.c @@ -1398,6 +1398,7 @@ acx_rxeof(struct acx_softc *sc) rxi.rxi_rssi = head->rbh_level; rxi.rxi_tstamp = letoh32(head->rbh_time); ieee80211_input(ifp, m, ni, ); + ieee80211_input_flush(ifp); ieee80211_release_node(ic, ni); } else { blob - d3b9ada242c555f179dad1529c20a1748d4b7917 blob + d62e42897867623911ae53637503a42178637603 --- sys/dev/ic/an.c +++ sys/dev/ic/an.c @@ -477,6 +477,7 @@ an_rxeof(struct an_softc *sc) rxi.rxi_rssi = frmhdr.an_rx_signal_strength; rxi.rxi_tstamp = an_switch32(frmhdr.an_rx_time); ieee80211_input(ifp, m, ni, ); + ieee80211_input_flush(ifp); ieee80211_release_node(ic, ni); } blob - d72e8edceada8a680744a6b8478bb91ac9e15e6e blob + 14dc84e692380a6a59b9fd9e3846f83e9a28e461 --- sys/dev/ic/ar5008.c +++ sys/dev/ic/ar5008.c @@ -960,7 +960,12 @@ ar5008_rx_process(struct athn_softc *sc) void ar5008_rx_intr(struct athn_softc *sc) { + struct ieee80211com *ic = >sc_ic; + struct ifnet *ifp = >ic_if; + while (ar5008_rx_process(sc) == 0); + + ieee80211_input_flush(ifp); } int blob - 69ade5ade5a35e632a025db327668d695a0edd2d blob + c34c8dae63f268c4baa4b8be396e0a6f6af8d05b --- sys/dev/ic/ar9003.c +++ sys/dev/ic/ar9003.c @@ -1066,7 +1066,12 @@ ar9003_rx_process(struct athn_softc *sc, int qid) void ar9003_rx_intr(struct athn_softc *sc, int qid) { + struct ieee80211com *ic = >sc_ic; + struct ifnet *ifp = >ic_if; + while (ar9003_rx_process(sc, qid) == 0); + + ieee80211_input_flush(ifp); } int blob - c0c5f4241b010c5c38a557d97963fbdbc884336d blob + 4d481bf2aada58fd7f9c484e3564db5b3d96d2b0 --- sys/dev/ic/ath.c +++ sys/dev/ic/ath.c @@ -2005,6 +2005,8 @@ ath_rx_proc(void *arg, int npending) TAILQ_INSERT_TAIL(>sc_rxbuf, bf, bf_list); } while (ath_rxbuf_init(sc, bf) == 0); + ieee80211_input_flush(ifp); + ath_hal_set_rx_signal(ah); /* rx signal state monitoring */ ath_hal_start_rx(ah); /* in case of RXEOL */ #undef PA2DESC blob - cbf21da74007080c510fa9c56a58aedff62fa586 blob + 301b784e1b6878e0b024e17f7e515097fa6363d1 --- sys/dev/ic/atw.c +++ sys/dev/ic/atw.c @@ -3191,6 +3191,7 @@ atw_rxintr(struct atw_softc *sc) */ ieee80211_release_node(ic, ni); } + ieee80211_input_flush(ifp); /* Update the receive pointer. */ sc->sc_rxptr = i; blob - 7d6f2c5a5693881e2dffdd1814a757c9563196fc blob + 9ce7309aeb96e7282fc8ac28e3cfc633e1d9f592 --- sys/dev/ic/bwfm.c +++ sys/dev/ic/bwfm.c @@ -2118,6 +2118,7 @@ bwfm_rx_auth_ind(struct bwfm_softc *sc, struct bwfm_ev rxi.rxi_rssi = 0; rxi.rxi_tstamp = 0; ieee80211_input(ifp, m, ic->ic_bss, ); + ieee80211_input_flush(ifp); } void @@ -2174,6 +2175,7 @@ bwfm_rx_assoc_ind(struct bwfm_softc *sc, struct