Re: remove IF_PREPEND in src/sys/dev/pci, was Re: IFQ_PREPEND

2015-11-08 Thread Miod Vallat
> i thought le(4) was the worst.

Of course not. le(4) is slow and has some quirks (and the earliest ones
can't do multicast correctly), but at least they don't collapse into fetal
position under load and don't need a reset to recover.



Re: remove IF_PREPEND in src/sys/dev/pci, was Re: IFQ_PREPEND

2015-11-08 Thread Stuart Henderson
On 2015/11/08 10:45, Miod Vallat wrote:
> > i thought le(4) was the worst.
> 
> Of course not. le(4) is slow and has some quirks (and the earliest ones
> can't do multicast correctly), but at least they don't collapse into fetal
> position under load and don't need a reset to recover.

I see you too have experienced the old vr(4)'s. The newer ones are less bad.
(I don't think I have any I can reach without going for a drive if they break
though..)



Re: remove IF_PREPEND in src/sys/dev/pci, was Re: IFQ_PREPEND

2015-11-08 Thread Fred

On 11/08/15 01:06, David Gwynne wrote:



On 8 Nov 2015, at 8:23 AM, Miod Vallat  wrote:


noone has a vr?


You can't expect people to use the crappiest Ethernet chip ever
designed.


theyre in the alix, surely someone has those still.

i thought le(4) was the worst.



Finally tested this patch on:

vr0 at pci0 dev 9 function 0 "VIA VT6105M RhineIII" rev 0x96: irq 10, 
address 00:0d:b9:17:d8:90
ukphy0 at vr0 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI 
0x004063, model 0x0034


graybox:fred ~> ifconfig vr0
vr0: flags=8843 mtu 1500
lladdr 00:0d:b9:17:d8:90
priority: 0
groups: egress
media: Ethernet autoselect (100baseTX full-duplex)
status: active
inet 192.168.5.7 netmask 0xff00 broadcast 192.168.5.255

And so far there has been no obvious regressions.

Thanks

Fred



Re: remove IF_PREPEND in src/sys/dev/pci, was Re: IFQ_PREPEND

2015-11-07 Thread David Gwynne

> On 6 Nov 2015, at 9:35 PM, David Gwynne  wrote:
> 
> On Wed, Nov 04, 2015 at 08:18:48AM +0100, Martin Pieuchot wrote:
>> On 04/11/15(Wed) 10:39, David Gwynne wrote:
>>> im working on making the interface send queue mpsafe.
>>> 
>>> part of that involced deprecating the IFQ_POLL api because it allows the 
>>> caller to get a reference an mbuf that is still on the send queue. this is 
>>> dangerous if another cpu tries to manipulate the send queue. instead code 
>>> should call IFQ_DEQUEUE, which takes it off the queue for the driver to use.
>>> 
>>> however, blindly changing code from IFQ_POLL to IFQ_DEQUEUE will
>>> cause unwanted packet loss when encapsulation fails in some cases,
>>> such as when the tx ring is already full. to cope, the easiest
>>> solution is to requeue the packet so the next call to the start
>>> routine can try fitting it on the ring again.
>>> 
>>> this introduces IFQ_PREPEND (cause we currently have IF_PREPEND)
>>> and works on top of both hfsc and priq because i added hfsc_requeue
>>> a while back.
>>> 
>>> this also converts uses of IF_PREPEND in drivers to IFQ_PREPEND.
>>> this improves the situation a bit if people have decided to use
>>> hfsc on these interfaces.
> 
> ok, so after talking to kenjiro cho about the problems with IFQ_PREPEND
> and arbitrary queuing disciplines, im taking a step back while
> thinking about how to approach the send queue stuff for a bit.
> however, deprecating IF_PREPEND is still necessary.
> 
> this tweaks the relevant drivers to not need IF_PREPEND. note that
> these are non-trivial changes, so i would like some review and maybe
> some actual testing? especially on vr, im sure there are a lot of
> users.
> 
> most of the changes are just shuffling conditionals around, but vr
> also includes a change to use m_defrag. im not sure that is enough
> to satisfy the alignment requirements the code discusses, so testing
> is necessary.  and at least one of age, alc, or ale.

noone has a vr?

> 
> ok?
> 
> Index: if_age.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_age.c,v
> retrieving revision 1.29
> diff -u -p -r1.29 if_age.c
> --- if_age.c  25 Oct 2015 13:04:28 -  1.29
> +++ if_age.c  6 Nov 2015 11:27:04 -
> @@ -89,7 +89,7 @@ voidage_dma_free(struct age_softc *);
> void  age_get_macaddr(struct age_softc *);
> void  age_phy_reset(struct age_softc *);
> 
> -int  age_encap(struct age_softc *, struct mbuf **);
> +int  age_encap(struct age_softc *, struct mbuf *);
> void  age_init_tx_ring(struct age_softc *);
> int   age_init_rx_ring(struct age_softc *);
> void  age_init_rr_ring(struct age_softc *);
> @@ -957,7 +957,7 @@ void
> age_start(struct ifnet *ifp)
> {
> struct age_softc *sc = ifp->if_softc;
> -struct mbuf *m_head;
> +struct mbuf *m;
>   int enq;
> 
>   if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
> @@ -969,8 +969,14 @@ age_start(struct ifnet *ifp)
> 
>   enq = 0;
>   for (;;) {
> - IFQ_DEQUEUE(>if_snd, m_head);
> - if (m_head == NULL)
> + if (sc->age_cdata.age_tx_cnt + AGE_MAXTXSEGS >=
> + AGE_TX_RING_CNT - 2) {
> + ifp->if_flags |= IFF_OACTIVE;
> + break;
> + }
> +
> + IFQ_DEQUEUE(>if_snd, m);
> + if (m == NULL)
>   break;
> 
>   /*
> @@ -978,14 +984,9 @@ age_start(struct ifnet *ifp)
>* don't have room, set the OACTIVE flag and wait
>* for the NIC to drain the ring.
>*/
> - if (age_encap(sc, _head)) {
> - if (m_head == NULL)
> - ifp->if_oerrors++;
> - else {
> - IF_PREPEND(>if_snd, m_head);
> - ifp->if_flags |= IFF_OACTIVE;
> - }
> - break;
> + if (age_encap(sc, m) != 0) {
> + ifp->if_oerrors++;
> + continue;
>   }
>   enq = 1;
> 
> @@ -995,7 +996,7 @@ age_start(struct ifnet *ifp)
>* to him.
>*/
>   if (ifp->if_bpf != NULL)
> - bpf_mtap_ether(ifp->if_bpf, m_head, BPF_DIRECTION_OUT);
> + bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
> #endif
>   }
> 
> @@ -1115,16 +1116,14 @@ age_mac_config(struct age_softc *sc)
> }
> 
> int
> -age_encap(struct age_softc *sc, struct mbuf **m_head)
> +age_encap(struct age_softc *sc, struct mbuf *m)
> {
>   struct age_txdesc *txd, *txd_last;
>   struct tx_desc *desc;
> - struct mbuf *m;
>   bus_dmamap_t map;
>   uint32_t cflags, poff, vtag;
>   int error, i, prod;
> 
> - m = *m_head;
>   cflags = vtag = 0;
>   poff = 0;
> 
> @@ -1133,27 +1132,20 @@ age_encap(struct age_softc 

Re: remove IF_PREPEND in src/sys/dev/pci, was Re: IFQ_PREPEND

2015-11-07 Thread Miod Vallat
> noone has a vr?

You can't expect people to use the crappiest Ethernet chip ever
designed.



Re: remove IF_PREPEND in src/sys/dev/pci, was Re: IFQ_PREPEND

2015-11-07 Thread David Gwynne

> On 8 Nov 2015, at 8:23 AM, Miod Vallat  wrote:
> 
>> noone has a vr?
> 
> You can't expect people to use the crappiest Ethernet chip ever
> designed.

theyre in the alix, surely someone has those still.

i thought le(4) was the worst.



remove IF_PREPEND in src/sys/dev/pci, was Re: IFQ_PREPEND

2015-11-06 Thread David Gwynne
On Wed, Nov 04, 2015 at 08:18:48AM +0100, Martin Pieuchot wrote:
> On 04/11/15(Wed) 10:39, David Gwynne wrote:
> > im working on making the interface send queue mpsafe.
> > 
> > part of that involced deprecating the IFQ_POLL api because it allows the 
> > caller to get a reference an mbuf that is still on the send queue. this is 
> > dangerous if another cpu tries to manipulate the send queue. instead code 
> > should call IFQ_DEQUEUE, which takes it off the queue for the driver to use.
> > 
> > however, blindly changing code from IFQ_POLL to IFQ_DEQUEUE will
> > cause unwanted packet loss when encapsulation fails in some cases,
> > such as when the tx ring is already full. to cope, the easiest
> > solution is to requeue the packet so the next call to the start
> > routine can try fitting it on the ring again.
> > 
> > this introduces IFQ_PREPEND (cause we currently have IF_PREPEND)
> > and works on top of both hfsc and priq because i added hfsc_requeue
> > a while back.
> > 
> > this also converts uses of IF_PREPEND in drivers to IFQ_PREPEND.
> > this improves the situation a bit if people have decided to use
> > hfsc on these interfaces.

ok, so after talking to kenjiro cho about the problems with IFQ_PREPEND
and arbitrary queuing disciplines, im taking a step back while
thinking about how to approach the send queue stuff for a bit.
however, deprecating IF_PREPEND is still necessary.

this tweaks the relevant drivers to not need IF_PREPEND. note that
these are non-trivial changes, so i would like some review and maybe
some actual testing? especially on vr, im sure there are a lot of
users.

most of the changes are just shuffling conditionals around, but vr
also includes a change to use m_defrag. im not sure that is enough
to satisfy the alignment requirements the code discusses, so testing
is necessary.  and at least one of age, alc, or ale.

ok?

Index: if_age.c
===
RCS file: /cvs/src/sys/dev/pci/if_age.c,v
retrieving revision 1.29
diff -u -p -r1.29 if_age.c
--- if_age.c25 Oct 2015 13:04:28 -  1.29
+++ if_age.c6 Nov 2015 11:27:04 -
@@ -89,7 +89,7 @@ void  age_dma_free(struct age_softc *);
 void   age_get_macaddr(struct age_softc *);
 void   age_phy_reset(struct age_softc *);
 
-intage_encap(struct age_softc *, struct mbuf **);
+intage_encap(struct age_softc *, struct mbuf *);
 void   age_init_tx_ring(struct age_softc *);
 intage_init_rx_ring(struct age_softc *);
 void   age_init_rr_ring(struct age_softc *);
@@ -957,7 +957,7 @@ void
 age_start(struct ifnet *ifp)
 {
 struct age_softc *sc = ifp->if_softc;
-struct mbuf *m_head;
+struct mbuf *m;
int enq;
 
if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
@@ -969,8 +969,14 @@ age_start(struct ifnet *ifp)
 
enq = 0;
for (;;) {
-   IFQ_DEQUEUE(>if_snd, m_head);
-   if (m_head == NULL)
+   if (sc->age_cdata.age_tx_cnt + AGE_MAXTXSEGS >=
+   AGE_TX_RING_CNT - 2) {
+   ifp->if_flags |= IFF_OACTIVE;
+   break;
+   }
+
+   IFQ_DEQUEUE(>if_snd, m);
+   if (m == NULL)
break;
 
/*
@@ -978,14 +984,9 @@ age_start(struct ifnet *ifp)
 * don't have room, set the OACTIVE flag and wait
 * for the NIC to drain the ring.
 */
-   if (age_encap(sc, _head)) {
-   if (m_head == NULL)
-   ifp->if_oerrors++;
-   else {
-   IF_PREPEND(>if_snd, m_head);
-   ifp->if_flags |= IFF_OACTIVE;
-   }
-   break;
+   if (age_encap(sc, m) != 0) {
+   ifp->if_oerrors++;
+   continue;
}
enq = 1;
 
@@ -995,7 +996,7 @@ age_start(struct ifnet *ifp)
 * to him.
 */
if (ifp->if_bpf != NULL)
-   bpf_mtap_ether(ifp->if_bpf, m_head, BPF_DIRECTION_OUT);
+   bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
 #endif
}
 
@@ -1115,16 +1116,14 @@ age_mac_config(struct age_softc *sc)
 }
 
 int
-age_encap(struct age_softc *sc, struct mbuf **m_head)
+age_encap(struct age_softc *sc, struct mbuf *m)
 {
struct age_txdesc *txd, *txd_last;
struct tx_desc *desc;
-   struct mbuf *m;
bus_dmamap_t map;
uint32_t cflags, poff, vtag;
int error, i, prod;
 
-   m = *m_head;
cflags = vtag = 0;
poff = 0;
 
@@ -1133,27 +1132,20 @@ age_encap(struct age_softc *sc, struct m
txd_last = txd;
map = txd->tx_dmamap;
 
-   error = bus_dmamap_load_mbuf(sc->sc_dmat, map, *m_head, BUS_DMA_NOWAIT);
+   error =