Re: bge(4) Jumbo support for newer chipsets

2014-09-02 Thread Mike Belopuhov
On 2 September 2014 03:54, Brad Smith b...@comstyle.com wrote:
 On Wed, Aug 27, 2014 at 02:25:27AM -0400, Brad Smith wrote:
 Looking for some testing of the following diff to add Jumbo support for the
 BCM5714 / BCM5780 and BCM5717 / BCM5719 / BCM5720 / BCM57765 / BCM57766
 chipsets.

 Here is an updated diff with bge_rxrinfo() being fixed.


get it in.  OK mikeb



Re: bge(4) Jumbo support for newer chipsets

2014-09-01 Thread David Gwynne
if noone else is going to try this then i think it should go in.

On 28 Aug 2014, at 20:32, David Gwynne da...@gwynne.id.au wrote:

 
 On 28 Aug 2014, at 3:02 am, Mike Belopuhov m...@belopuhov.com wrote:
 
 On 27 August 2014 08:25, Brad Smith b...@comstyle.com wrote:
 Looking for some testing of the following diff to add Jumbo support for the
 BCM5714 / BCM5780 and BCM5717 / BCM5719 / BCM5720 / BCM57765 / BCM57766
 chipsets.
 
 
 
 i have tested this on Broadcom BCM5719 rev 0x01, unknown BCM5719 
 (0x5719001),
 APE firmware NCSI 1.1.15.0  and Broadcom BCM5714 rev 0xa3, BCM5715
 A3 (0x9003).
 
 it works, however i'm not strictly a fan of switching the cluster pool to
 larger one for 5714.  wasting another 8k page (on sparc for example) for
 every rx cluster in 90% cases sounds kinda wrong to me.  but ymmv.
 
 this is what MCLGETI was invented to solve though. comparing pre mclgeti to 
 what this does:
 
 a 5714 right now without jumbos would have 512 rings entries with 2048 bytes 
 on each. 2048 * 512 is a 1024k of ram. if we bumped the std ring up to jumbos 
 by default, 9216 * 512 would eat 4608k of ram.
 
 my boxes with bge with mclgeti generally sit around 40 clusters, but 
 sometimes end up around 80. 80 * 9216 is 720k. we can have jumbos and still 
 be ahead.
 
 if you compare the nics with split rings: 512 * 2048 + 256 * 9216 is ~3.3M. 
 the same chip with mclgeti and only doing a 1500 byte workload would be 80 * 
 2048 + 17 * 9216, or 300k.
 
 
 apart from that there's a deficiency in the diff itself.  you probably want
 to change MCLBYTES in bge_rxrinfo to bge_rx_std_len otherwise statistics
 look wrong.
 
 yeah.
 
 i have tested both 1500 and 9000 mtus on a 5714 and it is working well. as 
 you say, 5719 seems to be fine too, but ive only tested it with mtu 1500. ill 
 test 9k tomorrow.
 
 it needs tests on older chips too though.




Re: bge(4) Jumbo support for newer chipsets

2014-09-01 Thread Brad Smith
On Wed, Aug 27, 2014 at 02:25:27AM -0400, Brad Smith wrote:
 Looking for some testing of the following diff to add Jumbo support for the
 BCM5714 / BCM5780 and BCM5717 / BCM5719 / BCM5720 / BCM57765 / BCM57766
 chipsets.

Here is an updated diff with bge_rxrinfo() being fixed.


Index: if_bge.c
===
RCS file: /home/cvs/src/sys/dev/pci/if_bge.c,v
retrieving revision 1.360
diff -u -p -u -p -r1.360 if_bge.c
--- if_bge.c26 Aug 2014 11:01:21 -  1.360
+++ if_bge.c2 Sep 2014 01:50:30 -
@@ -1117,10 +1117,10 @@ bge_newbuf(struct bge_softc *sc, int i)
struct mbuf *m;
int error;
 
-   m = MCLGETI(NULL, M_DONTWAIT, NULL, MCLBYTES);
+   m = MCLGETI(NULL, M_DONTWAIT, NULL, sc-bge_rx_std_len);
if (!m)
return (ENOBUFS);
-   m-m_len = m-m_pkthdr.len = MCLBYTES;
+   m-m_len = m-m_pkthdr.len = sc-bge_rx_std_len;
if (!(sc-bge_flags  BGE_RX_ALIGNBUG))
m_adj(m, ETHER_ALIGN);
 
@@ -1241,8 +1241,8 @@ bge_init_rx_ring_std(struct bge_softc *s
return (0);
 
for (i = 0; i  BGE_STD_RX_RING_CNT; i++) {
-   if (bus_dmamap_create(sc-bge_dmatag, MCLBYTES, 1, MCLBYTES, 0,
-   BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW,
+   if (bus_dmamap_create(sc-bge_dmatag, sc-bge_rx_std_len, 1,
+   sc-bge_rx_std_len, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW,
sc-bge_cdata.bge_rx_std_map[i]) != 0) {
printf(%s: unable to create dmamap for slot %d\n,
sc-bge_dev.dv_xname, i);
@@ -1485,6 +1485,7 @@ bge_init_tx_ring(struct bge_softc *sc)
 {
int i;
bus_dmamap_t dmamap;
+   bus_size_t txsegsz, txmaxsegsz;
struct txdmamap_pool_entry *dma;
 
if (sc-bge_flags  BGE_TXRING_VALID)
@@ -1504,11 +1505,18 @@ bge_init_tx_ring(struct bge_softc *sc)
if (BGE_CHIPREV(sc-bge_chipid) == BGE_CHIPREV_5700_BX)
bge_writembx(sc, BGE_MBX_TX_NIC_PROD0_LO, 0);
 
+   if (BGE_IS_JUMBO_CAPABLE(sc)) {
+   txsegsz = 4096;
+   txmaxsegsz = BGE_JLEN;
+   } else {
+   txsegsz = MCLBYTES;
+   txmaxsegsz = MCLBYTES;
+   }
+
SLIST_INIT(sc-txdma_list);
for (i = 0; i  BGE_TX_RING_CNT; i++) {
-   if (bus_dmamap_create(sc-bge_dmatag, BGE_JLEN,
-   BGE_NTXSEG, BGE_JLEN, 0, BUS_DMA_NOWAIT,
-   dmamap))
+   if (bus_dmamap_create(sc-bge_dmatag, txmaxsegsz,
+   BGE_NTXSEG, txsegsz, 0, BUS_DMA_NOWAIT, dmamap))
return (ENOBUFS);
if (dmamap == NULL)
panic(dmamap NULL in bge_init_tx_ring);
@@ -2001,7 +2009,7 @@ bge_blockinit(struct bge_softc *sc)
 * using this ring (i.e. once we set the MTU
 * high enough to require it).
 */
-   if (BGE_IS_JUMBO_CAPABLE(sc)) {
+   if (sc-bge_flags  BGE_JUMBO_RING) {
rcb = sc-bge_rdata-bge_info.bge_jumbo_rx_rcb;
BGE_HOSTADDR(rcb-bge_hostaddr,
BGE_RING_DMA_ADDR(sc, bge_rx_jumbo_ring));
@@ -2065,7 +2073,7 @@ bge_blockinit(struct bge_softc *sc)
 * to work around HW bugs.
 */
CSR_WRITE_4(sc, BGE_RBDI_STD_REPL_THRESH, 8);
-   if (BGE_IS_JUMBO_CAPABLE(sc))
+   if (sc-bge_flags  BGE_JUMBO_RING)
CSR_WRITE_4(sc, BGE_RBDI_JUMBO_REPL_THRESH, 8);
 
if (BGE_IS_5717_PLUS(sc)) {
@@ -2699,7 +2707,8 @@ bge_attach(struct device *parent, struct
case BGE_ASICREV_BCM5719:
case BGE_ASICREV_BCM5720:
sc-bge_flags |= BGE_5717_PLUS | BGE_5755_PLUS | BGE_575X_PLUS |
-   BGE_5705_PLUS;
+   BGE_5705_PLUS | BGE_JUMBO_CAPABLE | BGE_JUMBO_RING |
+   BGE_JUMBO_FRAME;
if (BGE_ASICREV(sc-bge_chipid) == BGE_ASICREV_BCM5719 ||
BGE_ASICREV(sc-bge_chipid) == BGE_ASICREV_BCM5720) {
/*
@@ -2707,6 +2716,13 @@ bge_attach(struct device *parent, struct
 * of TXMBUF available space.
 */
sc-bge_flags |= BGE_RDMA_BUG;
+
+   if (BGE_ASICREV(sc-bge_chipid) == BGE_ASICREV_BCM5719 

+   sc-bge_chipid == BGE_CHIPID_BCM5719_A0) {
+   /* Jumbo frame on BCM5719 A0 does not work. */
+   sc-bge_flags = ~(BGE_JUMBO_CAPABLE |
+   BGE_JUMBO_RING | BGE_JUMBO_FRAME);
+   }
}
break;
case BGE_ASICREV_BCM5755:
@@ -2721,12 +2737,12 @@ bge_attach(struct device *parent, struct
case BGE_ASICREV_BCM5701:
case BGE_ASICREV_BCM5703:
case BGE_ASICREV_BCM5704:
-   sc-bge_flags |= 

Re: bge(4) Jumbo support for newer chipsets

2014-08-28 Thread David Gwynne

On 28 Aug 2014, at 3:02 am, Mike Belopuhov m...@belopuhov.com wrote:

 On 27 August 2014 08:25, Brad Smith b...@comstyle.com wrote:
 Looking for some testing of the following diff to add Jumbo support for the
 BCM5714 / BCM5780 and BCM5717 / BCM5719 / BCM5720 / BCM57765 / BCM57766
 chipsets.
 
 
 
 i have tested this on Broadcom BCM5719 rev 0x01, unknown BCM5719 
 (0x5719001),
 APE firmware NCSI 1.1.15.0  and Broadcom BCM5714 rev 0xa3, BCM5715
 A3 (0x9003).
 
 it works, however i'm not strictly a fan of switching the cluster pool to
 larger one for 5714.  wasting another 8k page (on sparc for example) for
 every rx cluster in 90% cases sounds kinda wrong to me.  but ymmv.

this is what MCLGETI was invented to solve though. comparing pre mclgeti to 
what this does:

a 5714 right now without jumbos would have 512 rings entries with 2048 bytes on 
each. 2048 * 512 is a 1024k of ram. if we bumped the std ring up to jumbos by 
default, 9216 * 512 would eat 4608k of ram.

my boxes with bge with mclgeti generally sit around 40 clusters, but sometimes 
end up around 80. 80 * 9216 is 720k. we can have jumbos and still be ahead.

if you compare the nics with split rings: 512 * 2048 + 256 * 9216 is ~3.3M. the 
same chip with mclgeti and only doing a 1500 byte workload would be 80 * 2048 + 
17 * 9216, or 300k.

 
 apart from that there's a deficiency in the diff itself.  you probably want
 to change MCLBYTES in bge_rxrinfo to bge_rx_std_len otherwise statistics
 look wrong.

yeah.

i have tested both 1500 and 9000 mtus on a 5714 and it is working well. as you 
say, 5719 seems to be fine too, but ive only tested it with mtu 1500. ill test 
9k tomorrow.

it needs tests on older chips too though.



Re: bge(4) Jumbo support for newer chipsets

2014-08-28 Thread Mike Belopuhov
On 28 August 2014 12:32, David Gwynne da...@gwynne.id.au wrote:

 On 28 Aug 2014, at 3:02 am, Mike Belopuhov m...@belopuhov.com wrote:

 On 27 August 2014 08:25, Brad Smith b...@comstyle.com wrote:
 Looking for some testing of the following diff to add Jumbo support for the
 BCM5714 / BCM5780 and BCM5717 / BCM5719 / BCM5720 / BCM57765 / BCM57766
 chipsets.



 i have tested this on Broadcom BCM5719 rev 0x01, unknown BCM5719 
 (0x5719001),
 APE firmware NCSI 1.1.15.0  and Broadcom BCM5714 rev 0xa3, BCM5715
 A3 (0x9003).

 it works, however i'm not strictly a fan of switching the cluster pool to
 larger one for 5714.  wasting another 8k page (on sparc for example) for
 every rx cluster in 90% cases sounds kinda wrong to me.  but ymmv.

 this is what MCLGETI was invented to solve though. comparing pre mclgeti to 
 what this does:


that doesn't make my point invalid though.

 a 5714 right now without jumbos would have 512 rings entries with 2048 bytes 
 on each. 2048 * 512 is a 1024k of ram. if we bumped the std ring up to jumbos 
 by default, 9216 * 512 would eat 4608k of ram.


your calculation is a bit off.  it's not 9216 * 512, in case of sparc64 it's
8k * 2 * 512 which is 8M.

but my concern is different:  you ask uvm to do more work for every cluster
since now you need 2 consequent pages of memory for one cluster instead of
just one that fits 8k/2k = 4 clusters.

 my boxes with bge with mclgeti generally sit around 40 clusters, but 
 sometimes end up around 80. 80 * 9216 is 720k. we can have jumbos and still 
 be ahead.

 if you compare the nics with split rings: 512 * 2048 + 256 * 9216 is ~3.3M. 
 the same chip with mclgeti and only doing a 1500 byte workload would be 80 * 
 2048 + 17 * 9216, or 300k.


 apart from that there's a deficiency in the diff itself.  you probably want
 to change MCLBYTES in bge_rxrinfo to bge_rx_std_len otherwise statistics
 look wrong.

 yeah.

 i have tested both 1500 and 9000 mtus on a 5714 and it is working well. as 
 you say, 5719 seems to be fine too, but ive only tested it with mtu 1500. ill 
 test 9k tomorrow.

 it needs tests on older chips too though.



Re: bge(4) Jumbo support for newer chipsets

2014-08-28 Thread David Gwynne

On 28 Aug 2014, at 11:05 pm, Mike Belopuhov m...@belopuhov.com wrote:

 On 28 August 2014 12:32, David Gwynne da...@gwynne.id.au wrote:
 
 On 28 Aug 2014, at 3:02 am, Mike Belopuhov m...@belopuhov.com wrote:
 
 On 27 August 2014 08:25, Brad Smith b...@comstyle.com wrote:
 Looking for some testing of the following diff to add Jumbo support for the
 BCM5714 / BCM5780 and BCM5717 / BCM5719 / BCM5720 / BCM57765 / BCM57766
 chipsets.
 
 
 
 i have tested this on Broadcom BCM5719 rev 0x01, unknown BCM5719 
 (0x5719001),
 APE firmware NCSI 1.1.15.0  and Broadcom BCM5714 rev 0xa3, BCM5715
 A3 (0x9003).
 
 it works, however i'm not strictly a fan of switching the cluster pool to
 larger one for 5714.  wasting another 8k page (on sparc for example) for
 every rx cluster in 90% cases sounds kinda wrong to me.  but ymmv.
 
 this is what MCLGETI was invented to solve though. comparing pre mclgeti to 
 what this does:
 
 
 that doesn't make my point invalid though.
 
 a 5714 right now without jumbos would have 512 rings entries with 2048 bytes 
 on each. 2048 * 512 is a 1024k of ram. if we bumped the std ring up to 
 jumbos by default, 9216 * 512 would eat 4608k of ram.
 
 
 your calculation is a bit off.  it's not 9216 * 512, in case of sparc64 it's
 8k * 2 * 512 which is 8M.

on archs with 4k pages arts large pool code puts 9k frames on 12k pages, so 3k 
waste per cluster. on archs with and 8k pages 9k clusters land on on 64k pages, 
so the memory waste per 9k cluster is about 146 bytes. on archs with 16k pages 
you get a 7k waste per 9k cluster.

im proposing that the large page code in pools change so it puts at least 8 
items on a page, and pages are always powers of two. that would mean 9k 
clusters would always end up on a 128k page, which works out so every arch only 
gets the 146 bytes of waste per 9k cluster. less if i put the pool page headers 
on the same page.

 but my concern is different:  you ask uvm to do more work for every cluster
 since now you need 2 consequent pages of memory for one cluster instead of
 just one that fits 8k/2k = 4 clusters.

see above.

it is also worth noting that the current mbuf cluster allocator sets a low 
watermark that for a lot of workloads means we never return clusters to uvm.

my proposed code changes would get rid of that low watermark, but would age 
fully free pages so theyre only returned to uvm if theyve been idle for a 
second. if you have a fairly consistent workload you dont move pages in and out 
of uvm a lot.

on my production firewalls, the result of the above (128k pages for 9k clusters 
and free page idling) is i have allocated mbuf clusters 74009152525 times, but 
only allocated pages 8172372 times and returned pages to uvm 8172233 times. 
that works out to be about 9000 uses of the pages per uvm allocation. that 
particular box has been up for a fortnight so some of those counters may have 
wrapped, so take the numbers with a grain of salt.

 
 my boxes with bge with mclgeti generally sit around 40 clusters, but 
 sometimes end up around 80. 80 * 9216 is 720k. we can have jumbos and still 
 be ahead.
 
 if you compare the nics with split rings: 512 * 2048 + 256 * 9216 is ~3.3M. 
 the same chip with mclgeti and only doing a 1500 byte workload would be 80 * 
 2048 + 17 * 9216, or 300k.
 
 
 apart from that there's a deficiency in the diff itself.  you probably want
 to change MCLBYTES in bge_rxrinfo to bge_rx_std_len otherwise statistics
 look wrong.
 
 yeah.
 
 i have tested both 1500 and 9000 mtus on a 5714 and it is working well. as 
 you say, 5719 seems to be fine too, but ive only tested it with mtu 1500. 
 ill test 9k tomorrow.
 
 it needs tests on older chips too though.




bge(4) Jumbo support for newer chipsets

2014-08-27 Thread Brad Smith
Looking for some testing of the following diff to add Jumbo support for the
BCM5714 / BCM5780 and BCM5717 / BCM5719 / BCM5720 / BCM57765 / BCM57766
chipsets.


Index: if_bge.c
===
RCS file: /home/cvs/src/sys/dev/pci/if_bge.c,v
retrieving revision 1.360
diff -u -p -u -p -r1.360 if_bge.c
--- if_bge.c26 Aug 2014 11:01:21 -  1.360
+++ if_bge.c27 Aug 2014 00:48:45 -
@@ -1117,10 +1117,10 @@ bge_newbuf(struct bge_softc *sc, int i)
struct mbuf *m;
int error;
 
-   m = MCLGETI(NULL, M_DONTWAIT, NULL, MCLBYTES);
+   m = MCLGETI(NULL, M_DONTWAIT, NULL, sc-bge_rx_std_len);
if (!m)
return (ENOBUFS);
-   m-m_len = m-m_pkthdr.len = MCLBYTES;
+   m-m_len = m-m_pkthdr.len = sc-bge_rx_std_len;
if (!(sc-bge_flags  BGE_RX_ALIGNBUG))
m_adj(m, ETHER_ALIGN);
 
@@ -1241,8 +1241,8 @@ bge_init_rx_ring_std(struct bge_softc *s
return (0);
 
for (i = 0; i  BGE_STD_RX_RING_CNT; i++) {
-   if (bus_dmamap_create(sc-bge_dmatag, MCLBYTES, 1, MCLBYTES, 0,
-   BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW,
+   if (bus_dmamap_create(sc-bge_dmatag, sc-bge_rx_std_len, 1,
+   sc-bge_rx_std_len, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW,
sc-bge_cdata.bge_rx_std_map[i]) != 0) {
printf(%s: unable to create dmamap for slot %d\n,
sc-bge_dev.dv_xname, i);
@@ -1485,6 +1485,7 @@ bge_init_tx_ring(struct bge_softc *sc)
 {
int i;
bus_dmamap_t dmamap;
+   bus_size_t txsegsz, txmaxsegsz;
struct txdmamap_pool_entry *dma;
 
if (sc-bge_flags  BGE_TXRING_VALID)
@@ -1504,11 +1505,18 @@ bge_init_tx_ring(struct bge_softc *sc)
if (BGE_CHIPREV(sc-bge_chipid) == BGE_CHIPREV_5700_BX)
bge_writembx(sc, BGE_MBX_TX_NIC_PROD0_LO, 0);
 
+   if (BGE_IS_JUMBO_CAPABLE(sc)) {
+   txsegsz = 4096;
+   txmaxsegsz = BGE_JLEN;
+   } else {
+   txsegsz = MCLBYTES;
+   txmaxsegsz = MCLBYTES;
+   }
+
SLIST_INIT(sc-txdma_list);
for (i = 0; i  BGE_TX_RING_CNT; i++) {
-   if (bus_dmamap_create(sc-bge_dmatag, BGE_JLEN,
-   BGE_NTXSEG, BGE_JLEN, 0, BUS_DMA_NOWAIT,
-   dmamap))
+   if (bus_dmamap_create(sc-bge_dmatag, txmaxsegsz,
+   BGE_NTXSEG, txsegsz, 0, BUS_DMA_NOWAIT, dmamap))
return (ENOBUFS);
if (dmamap == NULL)
panic(dmamap NULL in bge_init_tx_ring);
@@ -2001,7 +2009,7 @@ bge_blockinit(struct bge_softc *sc)
 * using this ring (i.e. once we set the MTU
 * high enough to require it).
 */
-   if (BGE_IS_JUMBO_CAPABLE(sc)) {
+   if (sc-bge_flags  BGE_JUMBO_RING) {
rcb = sc-bge_rdata-bge_info.bge_jumbo_rx_rcb;
BGE_HOSTADDR(rcb-bge_hostaddr,
BGE_RING_DMA_ADDR(sc, bge_rx_jumbo_ring));
@@ -2065,7 +2073,7 @@ bge_blockinit(struct bge_softc *sc)
 * to work around HW bugs.
 */
CSR_WRITE_4(sc, BGE_RBDI_STD_REPL_THRESH, 8);
-   if (BGE_IS_JUMBO_CAPABLE(sc))
+   if (sc-bge_flags  BGE_JUMBO_RING)
CSR_WRITE_4(sc, BGE_RBDI_JUMBO_REPL_THRESH, 8);
 
if (BGE_IS_5717_PLUS(sc)) {
@@ -2699,7 +2707,8 @@ bge_attach(struct device *parent, struct
case BGE_ASICREV_BCM5719:
case BGE_ASICREV_BCM5720:
sc-bge_flags |= BGE_5717_PLUS | BGE_5755_PLUS | BGE_575X_PLUS |
-   BGE_5705_PLUS;
+   BGE_5705_PLUS | BGE_JUMBO_CAPABLE | BGE_JUMBO_RING |
+   BGE_JUMBO_FRAME;
if (BGE_ASICREV(sc-bge_chipid) == BGE_ASICREV_BCM5719 ||
BGE_ASICREV(sc-bge_chipid) == BGE_ASICREV_BCM5720) {
/*
@@ -2707,6 +2716,13 @@ bge_attach(struct device *parent, struct
 * of TXMBUF available space.
 */
sc-bge_flags |= BGE_RDMA_BUG;
+
+   if (BGE_ASICREV(sc-bge_chipid) == BGE_ASICREV_BCM5719 

+   sc-bge_chipid == BGE_CHIPID_BCM5719_A0) {
+   /* Jumbo frame on BCM5719 A0 does not work. */
+   sc-bge_flags = ~(BGE_JUMBO_CAPABLE |
+   BGE_JUMBO_RING | BGE_JUMBO_FRAME);
+   }
}
break;
case BGE_ASICREV_BCM5755:
@@ -2721,12 +2737,12 @@ bge_attach(struct device *parent, struct
case BGE_ASICREV_BCM5701:
case BGE_ASICREV_BCM5703:
case BGE_ASICREV_BCM5704:
-   sc-bge_flags |= BGE_5700_FAMILY | BGE_JUMBO_CAPABLE;
+   sc-bge_flags |= BGE_5700_FAMILY | BGE_JUMBO_CAPABLE | 
BGE_JUMBO_RING;

Re: bge(4) Jumbo support for newer chipsets

2014-08-27 Thread Mike Belopuhov
On 27 August 2014 08:25, Brad Smith b...@comstyle.com wrote:
 Looking for some testing of the following diff to add Jumbo support for the
 BCM5714 / BCM5780 and BCM5717 / BCM5719 / BCM5720 / BCM57765 / BCM57766
 chipsets.



i have tested this on Broadcom BCM5719 rev 0x01, unknown BCM5719 (0x5719001),
APE firmware NCSI 1.1.15.0  and Broadcom BCM5714 rev 0xa3, BCM5715
A3 (0x9003).

it works, however i'm not strictly a fan of switching the cluster pool to
larger one for 5714.  wasting another 8k page (on sparc for example) for
every rx cluster in 90% cases sounds kinda wrong to me.  but ymmv.

apart from that there's a deficiency in the diff itself.  you probably want
to change MCLBYTES in bge_rxrinfo to bge_rx_std_len otherwise statistics
look wrong.

i'm certainly OK with !5714 part of the diff.