Re: call if_input only once per Rx interrupt from net80211

2019-09-11 Thread Stefan Sperling
On Tue, Sep 10, 2019 at 10:35:34PM +0200, Matthias Schmidt wrote:
> Hi Stefan,
> 
> * Stefan Sperling wrote:
> > 
> > I think I see why. I forgot to convert some existing ieee80211_input()
> > calls to ieee80211_inputm(), in ieee80211_input.c.
> > These calls are related to buffered aggregated frames, so aggregated
> > frames triggered multiple if_input() calls per interrupt again.
> > 
> > In the first diff ieee80211_input() was putting aggregated frames
> > onto the global mbuf list. With this new diff they get added to the
> > mbuf list which the driver's rx interrupt handler passed in.
> > 
> > Does this fix the issue?
> 
> Yes, indeed.  Download test files from a leaseweb mirror is now so fast
> that my CPU fan starts spinning :)  This time, only tested on iwm.
> 
> Kudos for the quick fix!
> 
>   Matthias
> 

Thanks for your help! Glad it's fixed :)



Re: call if_input only once per Rx interrupt from net80211

2019-09-10 Thread Matthias Schmidt
Hi Stefan,

* Stefan Sperling wrote:
> 
> I think I see why. I forgot to convert some existing ieee80211_input()
> calls to ieee80211_inputm(), in ieee80211_input.c.
> These calls are related to buffered aggregated frames, so aggregated
> frames triggered multiple if_input() calls per interrupt again.
> 
> In the first diff ieee80211_input() was putting aggregated frames
> onto the global mbuf list. With this new diff they get added to the
> mbuf list which the driver's rx interrupt handler passed in.
> 
> Does this fix the issue?

Yes, indeed.  Download test files from a leaseweb mirror is now so fast
that my CPU fan starts spinning :)  This time, only tested on iwm.

Kudos for the quick fix!

Matthias



Re: call if_input only once per Rx interrupt from net80211

2019-09-10 Thread Stefan Sperling
On Tue, Sep 10, 2019 at 07:08:14PM +0200, Matthias Schmidt wrote:
> Hi Stefan,
> 
> * Stefan Sperling wrote:
> > 
> > New diff with above changes:
> 
> I tested your new diff with two different systems:
> 
> * Thinkpad T450s with iwm (8265, same as yesterday)
> * Thinkpad X220 with iwn (6205)
> 
> and on both systems I see a drastic regression compared to yesterday's
> patch.  The download speeds is around 400-700K/s on both systems.  As
> soon as I switch back to the kernel with yesterday's patch, I end up
> having 4M/s in average, again.

I think I see why. I forgot to convert some existing ieee80211_input()
calls to ieee80211_inputm(), in ieee80211_input.c.
These calls are related to buffered aggregated frames, so aggregated
frames triggered multiple if_input() calls per interrupt again.

In the first diff ieee80211_input() was putting aggregated frames
onto the global mbuf list. With this new diff they get added to the
mbuf list which the driver's rx interrupt handler passed in.

Does this fix the issue?

diff refs/heads/master refs/heads/ifqdrop
blob - d72e8edceada8a680744a6b8478bb91ac9e15e6e
blob + a3203d7eb1a67d478bf280a551a43f2dc66c0965
--- sys/dev/ic/ar5008.c
+++ sys/dev/ic/ar5008.c
@@ -789,7 +789,7 @@ ar5008_rx_radiotap(struct athn_softc *sc, struct mbuf 
 #endif
 
 static __inline int
-ar5008_rx_process(struct athn_softc *sc)
+ar5008_rx_process(struct athn_softc *sc, struct mbuf_list *ml)
 {
struct ieee80211com *ic = >sc_ic;
struct ifnet *ifp = >ic_if;
@@ -931,7 +931,7 @@ ar5008_rx_process(struct athn_softc *sc)
rxi.rxi_rssi = MS(ds->ds_status4, AR_RXS4_RSSI_COMBINED);
rxi.rxi_rssi += AR_DEFAULT_NOISE_FLOOR;
rxi.rxi_tstamp = ds->ds_status2;
-   ieee80211_input(ifp, m, ni, );
+   ieee80211_inputm(ifp, m, ni, , ml);
 
/* Node is no longer needed. */
ieee80211_release_node(ic, ni);
@@ -960,7 +960,13 @@ ar5008_rx_process(struct athn_softc *sc)
 void
 ar5008_rx_intr(struct athn_softc *sc)
 {
-   while (ar5008_rx_process(sc) == 0);
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+   struct ieee80211com *ic = >sc_ic;
+   struct ifnet *ifp = >ic_if;
+
+   while (ar5008_rx_process(sc, ) == 0);
+
+   if_input(ifp, );
 }
 
 int
blob - 69ade5ade5a35e632a025db327668d695a0edd2d
blob + dafa3bd1f0b4ca2124c6d963c82e289594e88b27
--- sys/dev/ic/ar9003.c
+++ sys/dev/ic/ar9003.c
@@ -83,7 +83,7 @@ void  ar9003_reset_txsring(struct athn_softc *);
 void   ar9003_rx_enable(struct athn_softc *);
 void   ar9003_rx_radiotap(struct athn_softc *, struct mbuf *,
struct ar_rx_status *);
-intar9003_rx_process(struct athn_softc *, int);
+intar9003_rx_process(struct athn_softc *, int, struct mbuf_list *);
 void   ar9003_rx_intr(struct athn_softc *, int);
 intar9003_tx_process(struct athn_softc *);
 void   ar9003_tx_intr(struct athn_softc *);
@@ -916,7 +916,7 @@ ar9003_rx_radiotap(struct athn_softc *sc, struct mbuf 
 #endif
 
 int
-ar9003_rx_process(struct athn_softc *sc, int qid)
+ar9003_rx_process(struct athn_softc *sc, int qid, struct mbuf_list *ml)
 {
struct ieee80211com *ic = >sc_ic;
struct ifnet *ifp = >ic_if;
@@ -1036,7 +1036,7 @@ ar9003_rx_process(struct athn_softc *sc, int qid)
rxi.rxi_flags = 0;  /* XXX */
rxi.rxi_rssi = MS(ds->ds_status5, AR_RXS5_RSSI_COMBINED);
rxi.rxi_tstamp = ds->ds_status3;
-   ieee80211_input(ifp, m, ni, );
+   ieee80211_inputm(ifp, m, ni, , ml);
 
/* Node is no longer needed. */
ieee80211_release_node(ic, ni);
@@ -1066,7 +1066,13 @@ ar9003_rx_process(struct athn_softc *sc, int qid)
 void
 ar9003_rx_intr(struct athn_softc *sc, int qid)
 {
-   while (ar9003_rx_process(sc, qid) == 0);
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+   struct ieee80211com *ic = >sc_ic;
+   struct ifnet *ifp = >ic_if;
+
+   while (ar9003_rx_process(sc, qid, ) == 0);
+
+   if_input(ifp, );
 }
 
 int
blob - c0c5f4241b010c5c38a557d97963fbdbc884336d
blob + c4414e5113134012edaf4ce4557bb7e25987e9ef
--- sys/dev/ic/ath.c
+++ sys/dev/ic/ath.c
@@ -1795,6 +1795,7 @@ ath_rxbuf_init(struct ath_softc *sc, struct ath_buf *b
 void
 ath_rx_proc(void *arg, int npending)
 {
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
 #definePA2DESC(_sc, _pa) \
((struct ath_desc *)((caddr_t)(_sc)->sc_desc + \
((_pa) - (_sc)->sc_desc_paddr)))
@@ -1946,7 +1947,7 @@ ath_rx_proc(void *arg, int npending)
if (!ath_softcrypto && (wh->i_fc[1] & IEEE80211_FC1_WEP)) {
/*
 * WEP is decrypted by hardware. Clear WEP bit
-* and trim WEP header for ieee80211_input().
+* and trim WEP header for ieee80211_inputm().
 */
wh->i_fc[1] &= ~IEEE80211_FC1_WEP;
bcopy(wh, , sizeof(whbuf));
@@ -1988,7 +1989,7 @@ ath_rx_proc(void *arg, int npending)
  

Re: call if_input only once per Rx interrupt from net80211

2019-09-10 Thread Matthias Schmidt
Hi Stefan,

* Stefan Sperling wrote:
> 
> New diff with above changes:

I tested your new diff with two different systems:

* Thinkpad T450s with iwm (8265, same as yesterday)
* Thinkpad X220 with iwn (6205)

and on both systems I see a drastic regression compared to yesterday's
patch.  The download speeds is around 400-700K/s on both systems.  As
soon as I switch back to the kernel with yesterday's patch, I end up
having 4M/s in average, again.

Cheers

Matthias



Re: call if_input only once per Rx interrupt from net80211

2019-09-10 Thread Stefan Sperling
On Mon, Sep 09, 2019 at 06:17:34PM -0300, Martin Pieuchot wrote:
> On 09/09/19(Mon) 16:37, Stefan Sperling wrote:
> > On Mon, Sep 09, 2019 at 03:10:04PM +0200, Stefan Sperling wrote:
> > > The wifi stack currently calls if_input once per packet instead of once
> > > per interrupt. To make the wifi layer play nicely with the network stack
> > > we can split ieee80211_input() into two parts:
> > 
> > Updated diff which avoids purging the input queue at every state
> > change, e.g. even during SCAN->SCAN. With this we only purge the
> > queue if we're leaving RUN state or going back to INIT state.
> 
> Thanks a lot!  I must say I looked at this in the past but got lost in
> ieee80211_input().
> 
> Why not keep ieee80211_input() as a wrapper around your new mechanism?
> This way you don't need to touch all drivers at once.

We can keep ieee80211_input() for use by drivers that really only deliver
one frame per interrupt. Now we don't need to touch some drivers at all.

> I'd also suggest using a queue on-stack like we do for Ethernet drivers,
> this would get rid of the cleanup of `ic_ml' when the state change.  It
> would also help developers familiar with Ethernet drivers to understand
> what's happening ;o)

Yes, thanks for this suggestion! Things make a lot more sense this way.

> What about:
> 
> ieee80211_enqueue(ifp, m, ni, , );
> ieee80211_inputm(ifp, );

I don't like the name ieee80211_enqueue() because while data frames are
being enqueued, management frames are not enqueued. Instead, they cause
immediate state changes in the net80211 stack such as allocation of a
new node when a beacon is received state changes, e.g. RUN -> AUTH when
a deauth frame is received.

When drivers have the mbuf list on the stack we do not need a wrapper
for if_input() since drivers can just call it directly.

So I have chosen to use the following instead:

  ieee80211_inputm(ifp, m, ni, , );
  if_input(ifp, );

With ieee80211_input() being a wrapper around these two calls.

> Are you sure if_input() needs to be called at splnet()?  I don't think
> so because many pseudo-drivers call it at a different IPL.

I did this only because I was afraid the global ic_ml might be accessed
by hardware interrupts while we're running newstate() in a task as some
drivers will do. This becomes a non-issue with mbuf lists on the stack.

New diff with above changes:

diff refs/heads/master refs/heads/ifqdrop
blob - d72e8edceada8a680744a6b8478bb91ac9e15e6e
blob + a3203d7eb1a67d478bf280a551a43f2dc66c0965
--- sys/dev/ic/ar5008.c
+++ sys/dev/ic/ar5008.c
@@ -789,7 +789,7 @@ ar5008_rx_radiotap(struct athn_softc *sc, struct mbuf 
 #endif
 
 static __inline int
-ar5008_rx_process(struct athn_softc *sc)
+ar5008_rx_process(struct athn_softc *sc, struct mbuf_list *ml)
 {
struct ieee80211com *ic = >sc_ic;
struct ifnet *ifp = >ic_if;
@@ -931,7 +931,7 @@ ar5008_rx_process(struct athn_softc *sc)
rxi.rxi_rssi = MS(ds->ds_status4, AR_RXS4_RSSI_COMBINED);
rxi.rxi_rssi += AR_DEFAULT_NOISE_FLOOR;
rxi.rxi_tstamp = ds->ds_status2;
-   ieee80211_input(ifp, m, ni, );
+   ieee80211_inputm(ifp, m, ni, , ml);
 
/* Node is no longer needed. */
ieee80211_release_node(ic, ni);
@@ -960,7 +960,13 @@ ar5008_rx_process(struct athn_softc *sc)
 void
 ar5008_rx_intr(struct athn_softc *sc)
 {
-   while (ar5008_rx_process(sc) == 0);
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+   struct ieee80211com *ic = >sc_ic;
+   struct ifnet *ifp = >ic_if;
+
+   while (ar5008_rx_process(sc, ) == 0);
+
+   if_input(ifp, );
 }
 
 int
blob - 69ade5ade5a35e632a025db327668d695a0edd2d
blob + dafa3bd1f0b4ca2124c6d963c82e289594e88b27
--- sys/dev/ic/ar9003.c
+++ sys/dev/ic/ar9003.c
@@ -83,7 +83,7 @@ void  ar9003_reset_txsring(struct athn_softc *);
 void   ar9003_rx_enable(struct athn_softc *);
 void   ar9003_rx_radiotap(struct athn_softc *, struct mbuf *,
struct ar_rx_status *);
-intar9003_rx_process(struct athn_softc *, int);
+intar9003_rx_process(struct athn_softc *, int, struct mbuf_list *);
 void   ar9003_rx_intr(struct athn_softc *, int);
 intar9003_tx_process(struct athn_softc *);
 void   ar9003_tx_intr(struct athn_softc *);
@@ -916,7 +916,7 @@ ar9003_rx_radiotap(struct athn_softc *sc, struct mbuf 
 #endif
 
 int
-ar9003_rx_process(struct athn_softc *sc, int qid)
+ar9003_rx_process(struct athn_softc *sc, int qid, struct mbuf_list *ml)
 {
struct ieee80211com *ic = >sc_ic;
struct ifnet *ifp = >ic_if;
@@ -1036,7 +1036,7 @@ ar9003_rx_process(struct athn_softc *sc, int qid)
rxi.rxi_flags = 0;  /* XXX */
rxi.rxi_rssi = MS(ds->ds_status5, AR_RXS5_RSSI_COMBINED);
rxi.rxi_tstamp = ds->ds_status3;
-   ieee80211_input(ifp, m, ni, );
+   ieee80211_inputm(ifp, m, ni, , ml);
 
/* Node is no longer needed. */
ieee80211_release_node(ic, ni);
@@ -1066,7 +1066,13 @@ ar9003_rx_process(struct 

Re: call if_input only once per Rx interrupt from net80211

2019-09-09 Thread Martin Pieuchot
On 09/09/19(Mon) 16:37, Stefan Sperling wrote:
> On Mon, Sep 09, 2019 at 03:10:04PM +0200, Stefan Sperling wrote:
> > The wifi stack currently calls if_input once per packet instead of once
> > per interrupt. To make the wifi layer play nicely with the network stack
> > we can split ieee80211_input() into two parts:
> 
> Updated diff which avoids purging the input queue at every state
> change, e.g. even during SCAN->SCAN. With this we only purge the
> queue if we're leaving RUN state or going back to INIT state.

Thanks a lot!  I must say I looked at this in the past but got lost in
ieee80211_input().

Why not keep ieee80211_input() as a wrapper around your new mechanism?
This way you don't need to touch all drivers at once.

I'd also suggest using a queue on-stack like we do for Ethernet drivers,
this would get rid of the cleanup of `ic_ml' when the state change.  It
would also help developers familiar with Ethernet drivers to understand
what's happening ;o)

What about:

  ieee80211_enqueue(ifp, m, ni, , );
  ieee80211_inputm(ifp, );

Are you sure if_input() needs to be called at splnet()?  I don't think
so because many pseudo-drivers call it at a different IPL.

> diff refs/heads/master refs/heads/ifqdrop
> blob - a1ca62ea1a4b5af7d1d1765eb1da131e15e21e4e
> blob + bb0660f8da5ea340de57519e471b3a1a88c7da33
> --- sys/dev/ic/acx.c
> +++ sys/dev/ic/acx.c
> @@ -1398,6 +1398,7 @@ acx_rxeof(struct acx_softc *sc)
>   rxi.rxi_rssi = head->rbh_level;
>   rxi.rxi_tstamp = letoh32(head->rbh_time);
>   ieee80211_input(ifp, m, ni, );
> + ieee80211_input_flush(ifp);
>  
>   ieee80211_release_node(ic, ni);
>   } else {
> blob - d3b9ada242c555f179dad1529c20a1748d4b7917
> blob + d62e42897867623911ae53637503a42178637603
> --- sys/dev/ic/an.c
> +++ sys/dev/ic/an.c
> @@ -477,6 +477,7 @@ an_rxeof(struct an_softc *sc)
>   rxi.rxi_rssi = frmhdr.an_rx_signal_strength;
>   rxi.rxi_tstamp = an_switch32(frmhdr.an_rx_time);
>   ieee80211_input(ifp, m, ni, );
> + ieee80211_input_flush(ifp);
>   ieee80211_release_node(ic, ni);
>  }
>  
> blob - d72e8edceada8a680744a6b8478bb91ac9e15e6e
> blob + 14dc84e692380a6a59b9fd9e3846f83e9a28e461
> --- sys/dev/ic/ar5008.c
> +++ sys/dev/ic/ar5008.c
> @@ -960,7 +960,12 @@ ar5008_rx_process(struct athn_softc *sc)
>  void
>  ar5008_rx_intr(struct athn_softc *sc)
>  {
> + struct ieee80211com *ic = >sc_ic;
> + struct ifnet *ifp = >ic_if;
> +
>   while (ar5008_rx_process(sc) == 0);
> +
> + ieee80211_input_flush(ifp);
>  }
>  
>  int
> blob - 69ade5ade5a35e632a025db327668d695a0edd2d
> blob + c34c8dae63f268c4baa4b8be396e0a6f6af8d05b
> --- sys/dev/ic/ar9003.c
> +++ sys/dev/ic/ar9003.c
> @@ -1066,7 +1066,12 @@ ar9003_rx_process(struct athn_softc *sc, int qid)
>  void
>  ar9003_rx_intr(struct athn_softc *sc, int qid)
>  {
> + struct ieee80211com *ic = >sc_ic;
> + struct ifnet *ifp = >ic_if;
> +
>   while (ar9003_rx_process(sc, qid) == 0);
> +
> + ieee80211_input_flush(ifp);
>  }
>  
>  int
> blob - c0c5f4241b010c5c38a557d97963fbdbc884336d
> blob + 4d481bf2aada58fd7f9c484e3564db5b3d96d2b0
> --- sys/dev/ic/ath.c
> +++ sys/dev/ic/ath.c
> @@ -2005,6 +2005,8 @@ ath_rx_proc(void *arg, int npending)
>   TAILQ_INSERT_TAIL(>sc_rxbuf, bf, bf_list);
>   } while (ath_rxbuf_init(sc, bf) == 0);
>  
> + ieee80211_input_flush(ifp);
> +
>   ath_hal_set_rx_signal(ah);  /* rx signal state monitoring */
>   ath_hal_start_rx(ah);   /* in case of RXEOL */
>  #undef PA2DESC
> blob - cbf21da74007080c510fa9c56a58aedff62fa586
> blob + 301b784e1b6878e0b024e17f7e515097fa6363d1
> --- sys/dev/ic/atw.c
> +++ sys/dev/ic/atw.c
> @@ -3191,6 +3191,7 @@ atw_rxintr(struct atw_softc *sc)
>*/
>   ieee80211_release_node(ic, ni);
>   }
> + ieee80211_input_flush(ifp);
>  
>   /* Update the receive pointer. */
>   sc->sc_rxptr = i;
> blob - 7d6f2c5a5693881e2dffdd1814a757c9563196fc
> blob + 9ce7309aeb96e7282fc8ac28e3cfc633e1d9f592
> --- sys/dev/ic/bwfm.c
> +++ sys/dev/ic/bwfm.c
> @@ -2118,6 +2118,7 @@ bwfm_rx_auth_ind(struct bwfm_softc *sc, struct bwfm_ev
>   rxi.rxi_rssi = 0;
>   rxi.rxi_tstamp = 0;
>   ieee80211_input(ifp, m, ic->ic_bss, );
> + ieee80211_input_flush(ifp);
>  }
>  
>  void
> @@ -2174,6 +2175,7 @@ bwfm_rx_assoc_ind(struct bwfm_softc *sc, struct bwfm_e
>   rxi.rxi_rssi = 0;
>   rxi.rxi_tstamp = 0;
>   ieee80211_input(ifp, m, ni, );
> + ieee80211_input_flush(ifp);
>  }
>  
>  void
> @@ -2229,6 +2231,7 @@ bwfm_rx_leave_ind(struct bwfm_softc *sc, struct bwfm_e
>   rxi.rxi_rssi = 0;
>   rxi.rxi_tstamp = 0;
>   ieee80211_input(ifp, m, ni, );
> + ieee80211_input_flush(ifp);
>  }
>  #endif
>  
> @@ -2418,6 +2421,7 @@ bwfm_scan_node(struct bwfm_softc *sc, struct bwfm_bss_
>   rxi.rxi_rssi = 

Re: call if_input only once per Rx interrupt from net80211

2019-09-09 Thread Matthias Schmidt
Hi Stefan,

* Stefan Sperling wrote:
> On Mon, Sep 09, 2019 at 03:10:04PM +0200, Stefan Sperling wrote:
> > The wifi stack currently calls if_input once per packet instead of once
> > per interrupt. To make the wifi layer play nicely with the network stack
> > we can split ieee80211_input() into two parts:
> 
> Updated diff which avoids purging the input queue at every state
> change, e.g. even during SCAN->SCAN. With this we only purge the
> queue if we're leaving RUN state or going back to INIT state.

Tested your diff with

iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless-AC 8265"

So far, the diff provides amazing results.  Without your diff the
maximum speed of file downloads from my favorite mirror is around
1.2M/s in average.  With your diff it is around 4.5M/s in average.

Cheers
Matthias



Re: call if_input only once per Rx interrupt from net80211

2019-09-09 Thread Björn Ketelaars
On Mon 09/09/2019 16:37, Stefan Sperling wrote:
> On Mon, Sep 09, 2019 at 03:10:04PM +0200, Stefan Sperling wrote:
> > The wifi stack currently calls if_input once per packet instead of once
> > per interrupt. To make the wifi layer play nicely with the network stack
> > we can split ieee80211_input() into two parts:
> 
> Updated diff which avoids purging the input queue at every state
> change, e.g. even during SCAN->SCAN. With this we only purge the
> queue if we're leaving RUN state or going back to INIT state.

I tested the updated diff by downloading a 400M file and looking at the
throughput and Idrop before, and after each run. I also tried different
values for net.link.ifrxq.pressure_drop.

$ dmesg | grep iwn
iwn0 at pci2 dev 0 function 0 "Intel Centrino Advanced-N 6205" rev 0x34:
msi, MIMO 2T2R, MoW, address

Before applying your diff:
pressure_drop   MB/sIdrop beforeIdrop after
8   3.0 21944117
32  4.3 41174345
128 4.1 43454345

After applying your diff:
pressure_drop   MB/sIdrop beforeIdrop after
8   5.4 33543606
32  6.2 36063606
128 6.2 36063606

It seems that your diff improves throughput, and reduces number of
Idrops. At higher values of net.link.ifrxq.pressure_drop throughput is
even higher and delta Idrop is 0.



Re: call if_input only once per Rx interrupt from net80211

2019-09-09 Thread Florian Obser
this seems to work fine on
iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless AC 7260" rev 0x83, msi

download is considerably less wobbly, upload also benefits a bit(?):

before:

[florian@x1:~]$ tcpbench -t 10 192.168.178.73
  elapsed_ms  bytes mbps   bwidth 
10012683144   21.444  100.00% 
Conn:   1 Mbps:   21.444 Peak Mbps:   21.444 Avg Mbps:   21.444
20132894552   22.904  100.00% 
Conn:   1 Mbps:   22.904 Peak Mbps:   22.904 Avg Mbps:   22.904
3014 7717846.174  100.00% 
Conn:   1 Mbps:6.174 Peak Mbps:   22.904 Avg Mbps:6.174
40142380512   19.044  100.00% 
Conn:   1 Mbps:   19.044 Peak Mbps:   22.904 Avg Mbps:   19.044
50393042248   23.768  100.00% 
Conn:   1 Mbps:   23.768 Peak Mbps:   23.768 Avg Mbps:   23.768
60352858352   22.959  100.00% 
Conn:   1 Mbps:   22.959 Peak Mbps:   23.768 Avg Mbps:   22.959
70342875728   23.029  100.00% 
Conn:   1 Mbps:   23.029 Peak Mbps:   23.768 Avg Mbps:   23.029
80343701088   29.609  100.00% 
Conn:   1 Mbps:   29.609 Peak Mbps:   29.609 Avg Mbps:   29.609
90352927856   23.423  100.00% 
Conn:   1 Mbps:   23.423 Peak Mbps:   29.609 Avg Mbps:   23.423
   100394019648   32.061  100.00% 
Conn:   1 Mbps:   32.061 Peak Mbps:   32.061 Avg Mbps:   32.061

[florian@x1:~]$ tcpbench -s
  elapsed_ms  bytes mbps   bwidth 
10132939440   23.214  100.00% 
Conn:   1 Mbps:   23.214 Peak Mbps:   23.214 Avg Mbps:   23.214
2018 1274241.015  100.00% 
Conn:   1 Mbps:1.015 Peak Mbps:   23.214 Avg Mbps:1.015
30193453480   27.628  100.00% 
Conn:   1 Mbps:   27.628 Peak Mbps:   27.628 Avg Mbps:   27.628
40331781040   14.052  100.00% 
Conn:   1 Mbps:   14.052 Peak Mbps:   27.628 Avg Mbps:   14.052
5034 9281687.425  100.00% 
Conn:   1 Mbps:7.425 Peak Mbps:   27.628 Avg Mbps:7.425
60363386872   27.068  100.00% 
Conn:   1 Mbps:   27.068 Peak Mbps:   27.628 Avg Mbps:   27.068
70533233384   25.435  100.00% 
Conn:   1 Mbps:   25.435 Peak Mbps:   27.628 Avg Mbps:   25.435
80631796968   14.233  100.00% 
Conn:   1 Mbps:   14.233 Peak Mbps:   27.628 Avg Mbps:   14.233
9068  970160.773  100.00% 
Conn:   1 Mbps:0.773 Peak Mbps:   27.628 Avg Mbps:0.773
   100692641152   21.129  100.00% 
Conn:   1 Mbps:   21.129 Peak Mbps:   27.628 Avg Mbps:   21.129
   1107311062728.815  100.00% 
Conn:   1 Mbps:8.815 Peak Mbps:   27.628 Avg Mbps:8.815



after:

[florian@x1:~]$ tcpbench -t 10 192.168.178.73 
  elapsed_ms  bytes mbps   bwidth 
10043311576   26.387  100.00% 
Conn:   1 Mbps:   26.387 Peak Mbps:   26.387 Avg Mbps:   26.387
20063247864   25.957  100.00% 
Conn:   1 Mbps:   25.957 Peak Mbps:   26.387 Avg Mbps:   25.957
30483614208   27.748  100.00% 
Conn:   1 Mbps:   27.748 Peak Mbps:   27.748 Avg Mbps:   27.748
40503876296   30.979  100.00% 
Conn:   1 Mbps:   30.979 Peak Mbps:   30.979 Avg Mbps:   30.979
50543518640   28.065  100.00% 
Conn:   1 Mbps:   28.065 Peak Mbps:   30.979 Avg Mbps:   28.065
60533604072   28.861  100.00% 
Conn:   1 Mbps:   28.861 Peak Mbps:   30.979 Avg Mbps:   28.861
70543494024   27.924  100.00% 
Conn:   1 Mbps:   27.924 Peak Mbps:   30.979 Avg Mbps:   27.924
80553200080   25.601  100.00% 
Conn:   1 Mbps:   25.601 Peak Mbps:   30.979 Avg Mbps:   25.601
90563525880   28.179  100.00% 
Conn:   1 Mbps:   28.179 Peak Mbps:   30.979 Avg Mbps:   28.179
   100573084240   24.649  100.00% 
Conn:   1 Mbps:   24.649 Peak Mbps:   30.979 Avg Mbps:   24.649

[florian@x1:~]$ tcpbench -s
  elapsed_ms  bytes mbps   bwidth 
10024358480   34.798  100.00% 
Conn:   1 Mbps:   34.798 Peak Mbps:   34.798 Avg Mbps:   34.798
20135059312   40.034  100.00% 
Conn:   1 Mbps:   40.034 Peak Mbps:   40.034 Avg Mbps:   40.034
30114960848   39.766  100.00% 
Conn:   1 Mbps:   

Re: call if_input only once per Rx interrupt from net80211

2019-09-09 Thread Stefan Sperling
On Mon, Sep 09, 2019 at 03:10:04PM +0200, Stefan Sperling wrote:
> The wifi stack currently calls if_input once per packet instead of once
> per interrupt. To make the wifi layer play nicely with the network stack
> we can split ieee80211_input() into two parts:

Updated diff which avoids purging the input queue at every state
change, e.g. even during SCAN->SCAN. With this we only purge the
queue if we're leaving RUN state or going back to INIT state.

diff refs/heads/master refs/heads/ifqdrop
blob - a1ca62ea1a4b5af7d1d1765eb1da131e15e21e4e
blob + bb0660f8da5ea340de57519e471b3a1a88c7da33
--- sys/dev/ic/acx.c
+++ sys/dev/ic/acx.c
@@ -1398,6 +1398,7 @@ acx_rxeof(struct acx_softc *sc)
rxi.rxi_rssi = head->rbh_level;
rxi.rxi_tstamp = letoh32(head->rbh_time);
ieee80211_input(ifp, m, ni, );
+   ieee80211_input_flush(ifp);
 
ieee80211_release_node(ic, ni);
} else {
blob - d3b9ada242c555f179dad1529c20a1748d4b7917
blob + d62e42897867623911ae53637503a42178637603
--- sys/dev/ic/an.c
+++ sys/dev/ic/an.c
@@ -477,6 +477,7 @@ an_rxeof(struct an_softc *sc)
rxi.rxi_rssi = frmhdr.an_rx_signal_strength;
rxi.rxi_tstamp = an_switch32(frmhdr.an_rx_time);
ieee80211_input(ifp, m, ni, );
+   ieee80211_input_flush(ifp);
ieee80211_release_node(ic, ni);
 }
 
blob - d72e8edceada8a680744a6b8478bb91ac9e15e6e
blob + 14dc84e692380a6a59b9fd9e3846f83e9a28e461
--- sys/dev/ic/ar5008.c
+++ sys/dev/ic/ar5008.c
@@ -960,7 +960,12 @@ ar5008_rx_process(struct athn_softc *sc)
 void
 ar5008_rx_intr(struct athn_softc *sc)
 {
+   struct ieee80211com *ic = >sc_ic;
+   struct ifnet *ifp = >ic_if;
+
while (ar5008_rx_process(sc) == 0);
+
+   ieee80211_input_flush(ifp);
 }
 
 int
blob - 69ade5ade5a35e632a025db327668d695a0edd2d
blob + c34c8dae63f268c4baa4b8be396e0a6f6af8d05b
--- sys/dev/ic/ar9003.c
+++ sys/dev/ic/ar9003.c
@@ -1066,7 +1066,12 @@ ar9003_rx_process(struct athn_softc *sc, int qid)
 void
 ar9003_rx_intr(struct athn_softc *sc, int qid)
 {
+   struct ieee80211com *ic = >sc_ic;
+   struct ifnet *ifp = >ic_if;
+
while (ar9003_rx_process(sc, qid) == 0);
+
+   ieee80211_input_flush(ifp);
 }
 
 int
blob - c0c5f4241b010c5c38a557d97963fbdbc884336d
blob + 4d481bf2aada58fd7f9c484e3564db5b3d96d2b0
--- sys/dev/ic/ath.c
+++ sys/dev/ic/ath.c
@@ -2005,6 +2005,8 @@ ath_rx_proc(void *arg, int npending)
TAILQ_INSERT_TAIL(>sc_rxbuf, bf, bf_list);
} while (ath_rxbuf_init(sc, bf) == 0);
 
+   ieee80211_input_flush(ifp);
+
ath_hal_set_rx_signal(ah);  /* rx signal state monitoring */
ath_hal_start_rx(ah);   /* in case of RXEOL */
 #undef PA2DESC
blob - cbf21da74007080c510fa9c56a58aedff62fa586
blob + 301b784e1b6878e0b024e17f7e515097fa6363d1
--- sys/dev/ic/atw.c
+++ sys/dev/ic/atw.c
@@ -3191,6 +3191,7 @@ atw_rxintr(struct atw_softc *sc)
 */
ieee80211_release_node(ic, ni);
}
+   ieee80211_input_flush(ifp);
 
/* Update the receive pointer. */
sc->sc_rxptr = i;
blob - 7d6f2c5a5693881e2dffdd1814a757c9563196fc
blob + 9ce7309aeb96e7282fc8ac28e3cfc633e1d9f592
--- sys/dev/ic/bwfm.c
+++ sys/dev/ic/bwfm.c
@@ -2118,6 +2118,7 @@ bwfm_rx_auth_ind(struct bwfm_softc *sc, struct bwfm_ev
rxi.rxi_rssi = 0;
rxi.rxi_tstamp = 0;
ieee80211_input(ifp, m, ic->ic_bss, );
+   ieee80211_input_flush(ifp);
 }
 
 void
@@ -2174,6 +2175,7 @@ bwfm_rx_assoc_ind(struct bwfm_softc *sc, struct bwfm_e
rxi.rxi_rssi = 0;
rxi.rxi_tstamp = 0;
ieee80211_input(ifp, m, ni, );
+   ieee80211_input_flush(ifp);
 }
 
 void
@@ -2229,6 +2231,7 @@ bwfm_rx_leave_ind(struct bwfm_softc *sc, struct bwfm_e
rxi.rxi_rssi = 0;
rxi.rxi_tstamp = 0;
ieee80211_input(ifp, m, ni, );
+   ieee80211_input_flush(ifp);
 }
 #endif
 
@@ -2418,6 +2421,7 @@ bwfm_scan_node(struct bwfm_softc *sc, struct bwfm_bss_
rxi.rxi_rssi = (int16_t)letoh16(bss->rssi);
rxi.rxi_tstamp = 0;
ieee80211_input(ifp, m, ni, );
+   ieee80211_input_flush(ifp);
/* Restore channel */
if (bss_chan)
ni->ni_chan = bss_chan;
blob - 58417ce6f7de644fe86347459c04553956b2edc0
blob + 25a40399a82089a496698f4a1fb8d88cb59375ac
--- sys/dev/ic/bwi.c
+++ sys/dev/ic/bwi.c
@@ -8466,6 +8466,7 @@ bwi_rxeof(struct bwi_softc *sc, int end_idx)
 next:
idx = (idx + 1) % BWI_RX_NDESC;
}
+   ieee80211_input_flush(ifp);
 
rbd->rbd_idx = idx;
bus_dmamap_sync(sc->sc_dmat, rd->rdata_dmap, 0,
blob - 6a8c5646f0b6f649a60793e127cbd746d8ee89d7
blob + bcd515d11a392f49f75914f1af6581b35b8cf054
--- sys/dev/ic/malo.c
+++ sys/dev/ic/malo.c
@@ -1727,6 +1727,7 @@ skip:
sc->sc_rxring.cur = (sc->sc_rxring.cur + 1) %
MALO_RX_RING_COUNT;
}
+   

call if_input only once per Rx interrupt from net80211

2019-09-09 Thread Stefan Sperling
robert@ noticed that iwm(4) Rx throughput increased after running:
sysctl net.link.ifrxq.pressure_drop=32
The default value for this sysctl is 8.

dlg@ explained the problem to me:
[[[
the pressure stuff counts the number of times ifiq_input (which is
called from if_input) gets called before a corresponding ifiq_process
call gets run. if ifiq_input gets called a lot before ifiq_process runs,
pressure builds up and after ifiq_pressure_drop calls it will shed load
by dropping packets.

ifiq_input is built to handle a list of packets built by processing
the rx ring of a nic in a single interrupt. every hw interrupt should
cause a single ifiq_input call. the list is hopefully processed in nettq
in between the hw interrupts, but the pressure thresholds give you a bit
of leeway.
]]]

The wifi stack currently calls if_input once per packet instead of once
per interrupt. To make the wifi layer play nicely with the network stack
we can split ieee80211_input() into two parts:
 
1) enqueue frames pulled from hardware to an mbuf list: ieee80211_input()
1) deliver frames to network stack via if_input: ieee80211_input_flush()

Existing ieee80211_input() calls don't need to be adjusted because
enqueuing happens under the hood onto a new mbuf list in ieee80211com.
Drivers call ieee80211_input_flush() once per Rx interrupt.

This diff touches all drivers and should be tested on as many drivers
as possible. While these changes are mostly mechanical, a lot of this
touches legacy drivers which I cannot easily test.
Please thoroughly review for errors.

bwfm(4) does not use ieee80211_input() for data frames but has the
same bug and will need to be fixed separately (patrick@ is aware).

I expect that 11n mode in particular will benefit from this.
ifq pressure drops can be fairly easily triggered by Rx aggregation.

diff refs/heads/master refs/heads/ifqdrop
blob - a1ca62ea1a4b5af7d1d1765eb1da131e15e21e4e
blob + bb0660f8da5ea340de57519e471b3a1a88c7da33
--- sys/dev/ic/acx.c
+++ sys/dev/ic/acx.c
@@ -1398,6 +1398,7 @@ acx_rxeof(struct acx_softc *sc)
rxi.rxi_rssi = head->rbh_level;
rxi.rxi_tstamp = letoh32(head->rbh_time);
ieee80211_input(ifp, m, ni, );
+   ieee80211_input_flush(ifp);
 
ieee80211_release_node(ic, ni);
} else {
blob - d3b9ada242c555f179dad1529c20a1748d4b7917
blob + d62e42897867623911ae53637503a42178637603
--- sys/dev/ic/an.c
+++ sys/dev/ic/an.c
@@ -477,6 +477,7 @@ an_rxeof(struct an_softc *sc)
rxi.rxi_rssi = frmhdr.an_rx_signal_strength;
rxi.rxi_tstamp = an_switch32(frmhdr.an_rx_time);
ieee80211_input(ifp, m, ni, );
+   ieee80211_input_flush(ifp);
ieee80211_release_node(ic, ni);
 }
 
blob - d72e8edceada8a680744a6b8478bb91ac9e15e6e
blob + 14dc84e692380a6a59b9fd9e3846f83e9a28e461
--- sys/dev/ic/ar5008.c
+++ sys/dev/ic/ar5008.c
@@ -960,7 +960,12 @@ ar5008_rx_process(struct athn_softc *sc)
 void
 ar5008_rx_intr(struct athn_softc *sc)
 {
+   struct ieee80211com *ic = >sc_ic;
+   struct ifnet *ifp = >ic_if;
+
while (ar5008_rx_process(sc) == 0);
+
+   ieee80211_input_flush(ifp);
 }
 
 int
blob - 69ade5ade5a35e632a025db327668d695a0edd2d
blob + c34c8dae63f268c4baa4b8be396e0a6f6af8d05b
--- sys/dev/ic/ar9003.c
+++ sys/dev/ic/ar9003.c
@@ -1066,7 +1066,12 @@ ar9003_rx_process(struct athn_softc *sc, int qid)
 void
 ar9003_rx_intr(struct athn_softc *sc, int qid)
 {
+   struct ieee80211com *ic = >sc_ic;
+   struct ifnet *ifp = >ic_if;
+
while (ar9003_rx_process(sc, qid) == 0);
+
+   ieee80211_input_flush(ifp);
 }
 
 int
blob - c0c5f4241b010c5c38a557d97963fbdbc884336d
blob + 4d481bf2aada58fd7f9c484e3564db5b3d96d2b0
--- sys/dev/ic/ath.c
+++ sys/dev/ic/ath.c
@@ -2005,6 +2005,8 @@ ath_rx_proc(void *arg, int npending)
TAILQ_INSERT_TAIL(>sc_rxbuf, bf, bf_list);
} while (ath_rxbuf_init(sc, bf) == 0);
 
+   ieee80211_input_flush(ifp);
+
ath_hal_set_rx_signal(ah);  /* rx signal state monitoring */
ath_hal_start_rx(ah);   /* in case of RXEOL */
 #undef PA2DESC
blob - cbf21da74007080c510fa9c56a58aedff62fa586
blob + 301b784e1b6878e0b024e17f7e515097fa6363d1
--- sys/dev/ic/atw.c
+++ sys/dev/ic/atw.c
@@ -3191,6 +3191,7 @@ atw_rxintr(struct atw_softc *sc)
 */
ieee80211_release_node(ic, ni);
}
+   ieee80211_input_flush(ifp);
 
/* Update the receive pointer. */
sc->sc_rxptr = i;
blob - 7d6f2c5a5693881e2dffdd1814a757c9563196fc
blob + 9ce7309aeb96e7282fc8ac28e3cfc633e1d9f592
--- sys/dev/ic/bwfm.c
+++ sys/dev/ic/bwfm.c
@@ -2118,6 +2118,7 @@ bwfm_rx_auth_ind(struct bwfm_softc *sc, struct bwfm_ev
rxi.rxi_rssi = 0;
rxi.rxi_tstamp = 0;
ieee80211_input(ifp, m, ic->ic_bss, );
+   ieee80211_input_flush(ifp);
 }
 
 void
@@ -2174,6 +2175,7 @@ bwfm_rx_assoc_ind(struct bwfm_softc *sc, struct